merge from master
diff --git a/.gitignore b/.gitignore
index 503b769..0517ac2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,8 +12,12 @@
teststore2
dev1
dev2
+dev3
+dev4
derby.log
hadoop-conf-tmp
metastore_db
teststore
output
+tmp
+dist
diff --git a/algebricks/algebricks-common/pom.xml b/algebricks/algebricks-common/pom.xml
index 87f7004..a6279b9 100644
--- a/algebricks/algebricks-common/pom.xml
+++ b/algebricks/algebricks-common/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,7 +41,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-compiler/pom.xml b/algebricks/algebricks-compiler/pom.xml
index dbacd2f..f276439 100644
--- a/algebricks/algebricks-compiler/pom.xml
+++ b/algebricks/algebricks-compiler/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,12 +41,12 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-rewriter</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-core/pom.xml b/algebricks/algebricks-core/pom.xml
index 0fd538e..ac8cd89 100644
--- a/algebricks/algebricks-core/pom.xml
+++ b/algebricks/algebricks-core/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,27 +41,27 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-runtime</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/AssignPOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/AssignPOperator.java
index aea04b2..55da00e 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/AssignPOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/AssignPOperator.java
@@ -38,6 +38,8 @@
public class AssignPOperator extends AbstractPhysicalOperator {
+ private boolean flushFramesRapidly;
+
@Override
public PhysicalOperatorTag getOperatorTag() {
return PhysicalOperatorTag.ASSIGN;
@@ -76,7 +78,8 @@
// TODO push projections into the operator
int[] projectionList = JobGenHelper.projectAllVariables(opSchema);
- AssignRuntimeFactory runtime = new AssignRuntimeFactory(outColumns, evalFactories, projectionList);
+ AssignRuntimeFactory runtime = new AssignRuntimeFactory(outColumns, evalFactories, projectionList,
+ flushFramesRapidly);
// contribute one Asterix framewriter
RecordDescriptor recDesc = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), opSchema, context);
@@ -92,4 +95,8 @@
return true;
}
+ public void setRapidFrameFlush(boolean flushFramesRapidly) {
+ this.flushFramesRapidly = flushFramesRapidly;
+ }
+
}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/IndexInsertDeletePOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/IndexInsertDeletePOperator.java
index 2f9417b..879f2a9 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/IndexInsertDeletePOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/IndexInsertDeletePOperator.java
@@ -83,6 +83,7 @@
scanVariables.add(new LogicalVariable(-1));
IPhysicalPropertiesVector r = dataSourceIndex.getDataSource().getPropertiesProvider()
.computePropertiesVector(scanVariables);
+ r.getLocalProperties().clear();
IPhysicalPropertiesVector[] requirements = new IPhysicalPropertiesVector[1];
requirements[0] = r;
return new PhysicalRequirements(requirements, IPartitioningRequirementsCoordinator.NO_COORDINATION);
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/InsertDeletePOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/InsertDeletePOperator.java
index d8b7c33..c85bfb9 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/InsertDeletePOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/InsertDeletePOperator.java
@@ -73,6 +73,7 @@
scanVariables.addAll(keys);
scanVariables.add(new LogicalVariable(-1));
IPhysicalPropertiesVector r = dataSource.getPropertiesProvider().computePropertiesVector(scanVariables);
+ r.getLocalProperties().clear();
IPhysicalPropertiesVector[] requirements = new IPhysicalPropertiesVector[1];
requirements[0] = r;
return new PhysicalRequirements(requirements, IPartitioningRequirementsCoordinator.NO_COORDINATION);
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
index 9c5de9c..49ec269 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/prettyprint/LogicalOperatorPrettyPrintVisitor.java
@@ -349,7 +349,7 @@
@Override
public String visitExtensionOperator(ExtensionOperator op, Integer indent) throws AlgebricksException {
StringBuilder buffer = new StringBuilder();
- addIndent(buffer, indent).append("statistics collection");
+ addIndent(buffer, indent).append(op.toString());
return buffer.toString();
}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/AsterixNodeGroupDomain.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/DefaultNodeGroupDomain.java
similarity index 75%
rename from algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/AsterixNodeGroupDomain.java
rename to algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/DefaultNodeGroupDomain.java
index ab2cde2..226f02c 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/AsterixNodeGroupDomain.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/properties/DefaultNodeGroupDomain.java
@@ -14,21 +14,17 @@
*/
package edu.uci.ics.hyracks.algebricks.core.algebra.properties;
-public class AsterixNodeGroupDomain implements INodeDomain {
+public class DefaultNodeGroupDomain implements INodeDomain {
private String groupName;
- public AsterixNodeGroupDomain(String groupName) {
+ public DefaultNodeGroupDomain(String groupName) {
this.groupName = groupName;
}
@Override
public boolean sameAs(INodeDomain domain) {
- if (!(domain instanceof AsterixNodeGroupDomain)) {
- return false;
- }
- AsterixNodeGroupDomain dom2 = (AsterixNodeGroupDomain) domain;
- return groupName.equals(dom2.groupName);
+ return true;
}
@Override
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobBuilder.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobBuilder.java
index eb1e99c..1fe6b15 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobBuilder.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobBuilder.java
@@ -26,6 +26,8 @@
import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.IHyracksJobBuilder;
import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
import edu.uci.ics.hyracks.algebricks.runtime.operators.meta.AlgebricksMetaOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.ConnectorDescriptorId;
@@ -73,6 +75,11 @@
if (pc != null) {
pcForMicroOps.put(op, pc);
}
+ AbstractLogicalOperator logicalOp = (AbstractLogicalOperator) op;
+ if (logicalOp.getExecutionMode() == ExecutionMode.UNPARTITIONED && pc == null) {
+ AlgebricksPartitionConstraint apc = new AlgebricksCountPartitionConstraint(1);
+ pcForMicroOps.put(logicalOp, apc);
+ }
}
@Override
diff --git a/algebricks/algebricks-data/pom.xml b/algebricks/algebricks-data/pom.xml
index 613d692..b61a701 100644
--- a/algebricks/algebricks-data/pom.xml
+++ b/algebricks/algebricks-data/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,12 +41,12 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-examples/piglet-example/pom.xml b/algebricks/algebricks-examples/piglet-example/pom.xml
index 3c1588d..8297f4d 100644
--- a/algebricks/algebricks-examples/piglet-example/pom.xml
+++ b/algebricks/algebricks-examples/piglet-example/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -121,7 +121,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>junit</groupId>
diff --git a/algebricks/algebricks-examples/pom.xml b/algebricks/algebricks-examples/pom.xml
index 0376c13..31b7f2c 100644
--- a/algebricks/algebricks-examples/pom.xml
+++ b/algebricks/algebricks-examples/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/algebricks/algebricks-rewriter/pom.xml b/algebricks/algebricks-rewriter/pom.xml
index d81a479..e66486f 100644
--- a/algebricks/algebricks-rewriter/pom.xml
+++ b/algebricks/algebricks-rewriter/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,7 +41,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EliminateGroupByEmptyKeyRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EliminateGroupByEmptyKeyRule.java
new file mode 100644
index 0000000..e93fdd1
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EliminateGroupByEmptyKeyRule.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * This rule lift out the aggregate operator out from a group-by operator
+ * if the gby operator groups-by on empty key, e.g., the group-by variables are empty.
+ *
+ * @author yingyib
+ */
+public class EliminateGroupByEmptyKeyRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
+ return false;
+ }
+ GroupByOperator groupOp = (GroupByOperator) op;
+ List<LogicalVariable> groupVars = groupOp.getGbyVarList();
+ if (groupVars.size() > 0) {
+ return false;
+ }
+ List<ILogicalPlan> nestedPlans = groupOp.getNestedPlans();
+ if (nestedPlans.size() > 1) {
+ return false;
+ }
+ ILogicalPlan nestedPlan = nestedPlans.get(0);
+ if (nestedPlan.getRoots().size() > 1) {
+ return false;
+ }
+ Mutable<ILogicalOperator> topOpRef = nestedPlan.getRoots().get(0);
+ ILogicalOperator topOp = nestedPlan.getRoots().get(0).getValue();
+ Mutable<ILogicalOperator> nestedTupleSourceRef = getNestedTupleSourceReference(topOpRef);
+ /**
+ * connect nested top op into the plan
+ */
+ opRef.setValue(topOp);
+ /**
+ * connect child op into the plan
+ */
+ nestedTupleSourceRef.setValue(groupOp.getInputs().get(0).getValue());
+ return true;
+ }
+
+ private Mutable<ILogicalOperator> getNestedTupleSourceReference(Mutable<ILogicalOperator> nestedTopOperatorRef) {
+ Mutable<ILogicalOperator> currentOpRef = nestedTopOperatorRef;
+ while (currentOpRef.getValue().getInputs() != null && currentOpRef.getValue().getInputs().size() > 0) {
+ currentOpRef = currentOpRef.getValue().getInputs().get(0);
+ }
+ return currentOpRef;
+ }
+
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EnforceStructuralPropertiesRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EnforceStructuralPropertiesRule.java
index 9292e07..98606f1 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EnforceStructuralPropertiesRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/EnforceStructuralPropertiesRule.java
@@ -59,7 +59,7 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.StableSortPOperator;
import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.AsterixNodeGroupDomain;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.DefaultNodeGroupDomain;
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.ILocalStructuralProperty;
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.ILocalStructuralProperty.PropertyType;
@@ -85,7 +85,7 @@
public class EnforceStructuralPropertiesRule implements IAlgebraicRewriteRule {
- private static final INodeDomain DEFAULT_DOMAIN = new AsterixNodeGroupDomain("__DEFAULT");
+ private static final INodeDomain DEFAULT_DOMAIN = new DefaultNodeGroupDomain("__DEFAULT");
private PhysicalOptimizationConfig physicalOptimizationConfig;
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/LeftOuterJoinToInnerJoinRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/LeftOuterJoinToInnerJoinRule.java
new file mode 100644
index 0000000..247c10d
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/LeftOuterJoinToInnerJoinRule.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.ListSet;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * This rule is to convert an outer join into an inner join when possible.
+ *
+ * The specific pattern this rule will invoke for is:
+ * select not(is-null($v)) // $v is from the right branch of the left outer join below
+ * left-outer-join
+ *
+ * The pattern will be rewritten to:
+ * inner-join
+ *
+ * @author yingyib
+ */
+public class LeftOuterJoinToInnerJoinRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
+ return false;
+ }
+ Mutable<ILogicalOperator> op2Ref = op.getInputs().get(0);
+ AbstractLogicalOperator op2 = (AbstractLogicalOperator) op2Ref.getValue();
+ if (op2.getOperatorTag() != LogicalOperatorTag.LEFTOUTERJOIN) {
+ return false;
+ }
+ SelectOperator selectOp = (SelectOperator) op;
+ LeftOuterJoinOperator joinOp = (LeftOuterJoinOperator) op2;
+ ILogicalExpression condition = selectOp.getCondition().getValue();
+ if (condition.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+ return false;
+ }
+ ScalarFunctionCallExpression func = (ScalarFunctionCallExpression) condition;
+ /** check if the filter condition on top of the LOJ is not(is-null($v)), where $v is from the right child of LOJ */
+ if (!convertable(func, joinOp)) {
+ return false;
+ }
+ ILogicalOperator newJoin = new InnerJoinOperator(joinOp.getCondition(), joinOp.getInputs().get(0), joinOp
+ .getInputs().get(1));
+ opRef.setValue(newJoin);
+ context.computeAndSetTypeEnvironmentForOperator(newJoin);
+ return true;
+ }
+
+ /**
+ * check if the condition is not(is-null(var)) and var is from the right branch of the join
+ */
+ private boolean convertable(ScalarFunctionCallExpression func, LeftOuterJoinOperator join)
+ throws AlgebricksException {
+ if (func.getFunctionIdentifier() != AlgebricksBuiltinFunctions.NOT) {
+ return false;
+ }
+ ILogicalExpression arg = func.getArguments().get(0).getValue();
+ if (arg.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+ return false;
+ }
+ ScalarFunctionCallExpression func2 = (ScalarFunctionCallExpression) arg;
+ if (func2.getFunctionIdentifier() != AlgebricksBuiltinFunctions.IS_NULL) {
+ return false;
+ }
+ ILogicalExpression arg2 = func2.getArguments().get(0).getValue();
+ if (arg2.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+ return false;
+ }
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) arg2;
+ LogicalVariable var = varExpr.getVariableReference();
+ ListSet<LogicalVariable> leftVars = new ListSet<LogicalVariable>();
+ ListSet<LogicalVariable> rightVars = new ListSet<LogicalVariable>();
+ VariableUtilities.getLiveVariables(join.getInputs().get(0).getValue(), leftVars);
+ VariableUtilities.getLiveVariables(join.getInputs().get(1).getValue(), rightVars);
+ if (!rightVars.contains(var)) {
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/algebricks/algebricks-runtime/pom.xml b/algebricks/algebricks-runtime/pom.xml
index 31b25be..570afb4 100644
--- a/algebricks/algebricks-runtime/pom.xml
+++ b/algebricks/algebricks-runtime/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,27 +41,27 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-data</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/base/AbstractOneInputOneOutputOneFramePushRuntime.java b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/base/AbstractOneInputOneOutputOneFramePushRuntime.java
index 082e98a..6c1dd5e 100644
--- a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/base/AbstractOneInputOneOutputOneFramePushRuntime.java
+++ b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/base/AbstractOneInputOneOutputOneFramePushRuntime.java
@@ -43,6 +43,10 @@
}
protected void appendToFrameFromTupleBuilder(ArrayTupleBuilder tb) throws HyracksDataException {
+ appendToFrameFromTupleBuilder(tb, false);
+ }
+
+ protected void appendToFrameFromTupleBuilder(ArrayTupleBuilder tb, boolean flushFrame) throws HyracksDataException {
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
FrameUtils.flushFrame(frame, writer);
appender.reset(frame, true);
@@ -51,6 +55,10 @@
"Could not write frame (AbstractOneInputOneOutputOneFramePushRuntime.appendToFrameFromTupleBuilder).");
}
}
+ if (flushFrame) {
+ FrameUtils.flushFrame(frame, writer);
+ appender.reset(frame, true);
+ }
}
protected void appendProjectionToFrame(int tIndex, int[] projectionList) throws HyracksDataException {
diff --git a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/AssignRuntimeFactory.java b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/AssignRuntimeFactory.java
index fa99c15..fb889ea 100644
--- a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/AssignRuntimeFactory.java
+++ b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/AssignRuntimeFactory.java
@@ -36,6 +36,7 @@
private int[] outColumns;
private IScalarEvaluatorFactory[] evalFactories;
+ private final boolean flushFramesRapidly;
/**
* @param outColumns
@@ -46,9 +47,15 @@
*/
public AssignRuntimeFactory(int[] outColumns, IScalarEvaluatorFactory[] evalFactories, int[] projectionList) {
+ this(outColumns, evalFactories, projectionList, false);
+ }
+
+ public AssignRuntimeFactory(int[] outColumns, IScalarEvaluatorFactory[] evalFactories, int[] projectionList,
+ boolean flushFramesRapidly) {
super(projectionList);
this.outColumns = outColumns;
this.evalFactories = evalFactories;
+ this.flushFramesRapidly = flushFramesRapidly;
}
@Override
@@ -107,9 +114,22 @@
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
tAccess.reset(buffer);
int nTuple = tAccess.getTupleCount();
- for (int t = 0; t < nTuple; t++) {
- tRef.reset(tAccess, t);
- produceTuple(tupleBuilder, tAccess, t, tRef);
+ int t = 0;
+ if (nTuple > 1) {
+ for (; t < nTuple - 1; t++) {
+ tRef.reset(tAccess, t);
+ produceTuple(tupleBuilder, tAccess, t, tRef);
+ appendToFrameFromTupleBuilder(tupleBuilder);
+ }
+ }
+
+ tRef.reset(tAccess, t);
+ produceTuple(tupleBuilder, tAccess, t, tRef);
+ if (flushFramesRapidly) {
+ // Whenever all the tuples in the incoming frame have been consumed, the assign operator
+ // will push its frame to the next operator; i.e., it won't wait until the frame gets full.
+ appendToFrameFromTupleBuilder(tupleBuilder, true);
+ } else {
appendToFrameFromTupleBuilder(tupleBuilder);
}
}
diff --git a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/StreamProjectRuntimeFactory.java b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/StreamProjectRuntimeFactory.java
index 7f10948..3e87f31 100644
--- a/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/StreamProjectRuntimeFactory.java
+++ b/algebricks/algebricks-runtime/src/main/java/edu/uci/ics/hyracks/algebricks/runtime/operators/std/StreamProjectRuntimeFactory.java
@@ -34,8 +34,7 @@
}
public StreamProjectRuntimeFactory(int[] projectionList) {
- super(projectionList);
- this.flushFramesRapidly = false;
+ this(projectionList, false);
}
@Override
@@ -66,8 +65,10 @@
int nTuple = tAccess.getTupleCount();
int t = 0;
- for (; t < nTuple - 1; t++) {
- appendProjectionToFrame(t, projectionList);
+ if (nTuple > 1) {
+ for (; t < nTuple - 1; t++) {
+ appendProjectionToFrame(t, projectionList);
+ }
}
if (flushFramesRapidly) {
// Whenever all the tuples in the incoming frame have been consumed, the project operator
@@ -76,10 +77,8 @@
} else {
appendProjectionToFrame(t, projectionList);
}
-
}
};
}
-
}
diff --git a/algebricks/algebricks-tests/pom.xml b/algebricks/algebricks-tests/pom.xml
index 271083d..4d7abad 100644
--- a/algebricks/algebricks-tests/pom.xml
+++ b/algebricks/algebricks-tests/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -99,7 +99,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>junit</groupId>
@@ -110,17 +110,17 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/algebricks/pom.xml b/algebricks/pom.xml
index 774b07d..427e29b 100644
--- a/algebricks/pom.xml
+++ b/algebricks/pom.xml
@@ -16,7 +16,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>algebricks</name>
diff --git a/hivesterix/build.sh b/hivesterix/build.sh
new file mode 100644
index 0000000..8f61559
--- /dev/null
+++ b/hivesterix/build.sh
@@ -0,0 +1,12 @@
+rm -rf dist
+mkdir dist
+
+hadoop_versions=(0.20.2 0.23.1 0.23.6 1.0.4 cdh-4.1 cdh-4.2)
+cd ../
+for v in ${hadoop_versions[@]}
+do
+ #echo mvn clean package -DskipTests=true -Dhadoop=${v}
+ mvn clean package -DskipTests=true -Dhadoop=${v}
+ #echo mv hivesterix/hivesterix-dist/target/hivesterix-dist-*-binary-assembly.zip hivesterix/dist/hivesterix-dist-binary-assembley-hdfs-${v}.zip
+ mv hivesterix/hivesterix-dist/target/hivesterix-dist-*-binary-assembly.zip hivesterix/dist/hivesterix-dist-binary-assembley-hdfs-${v}.zip
+done
diff --git a/hivesterix/hivesterix-common/pom.xml b/hivesterix/hivesterix-common/pom.xml
index 7741193..fe9271b 100644
--- a/hivesterix/hivesterix-common/pom.xml
+++ b/hivesterix/hivesterix-common/pom.xml
@@ -1,18 +1,13 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hivesterix-common</artifactId>
@@ -21,7 +16,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -44,30 +39,54 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-hdfs-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-serde</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
</dependencies>
+
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>cdh-build</id>
+ <url>https://repository.cloudera.com/content/groups/cdh-build</url>
+ </repository>
+ </repositories>
</project>
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
index 783004c..2ec3811 100644
--- a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
@@ -12,212 +12,217 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class ExpressionTranslator {
-
- public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
- FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- Object info = ((HiveFunctionInfo) funcInfo).getInfo();
- ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
- return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
- }
-
- if (fid.getName().equals(ExpressionConstant.NULL)) {
- return new ExprNodeNullDesc();
- }
-
- /**
- * argument expressions: translate argument expressions recursively
- * first, this logic is shared in scalar, aggregation and unnesting
- * function
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
- List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- for (Mutable<ILogicalExpression> argument : arguments) {
- /**
- * parameters could not be aggregate function desc
- */
- ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
- parameters.add(parameter);
- }
-
- /**
- * get expression
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
-
- int inputSize = parameters.size();
- List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
-
- // generate expression tree if necessary
- while (inputSize > 2) {
- int pairs = inputSize / 2;
- for (int i = 0; i < pairs; i++) {
- List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
- descs.add(parameters.get(2 * i));
- descs.add(parameters.get(2 * i + 1));
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
- currentDescs.add(desc);
- }
-
- if (inputSize % 2 != 0) {
- // List<ExprNodeDesc> descs = new
- // ArrayList<ExprNodeDesc>();
- // ExprNodeDesc lastExpr =
- // currentDescs.remove(currentDescs.size() - 1);
- // descs.add(lastExpr);
- currentDescs.add(parameters.get(inputSize - 1));
- // ExprNodeDesc desc =
- // ExprNodeGenericFuncDesc.newInstance(udf, descs);
- // currentDescs.add(desc);
- }
- inputSize = currentDescs.size();
- parameters.clear();
- parameters.addAll(currentDescs);
- currentDescs.clear();
- }
-
- } else {
- Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- if (secondInfo != null) {
-
- /**
- * for GenericUDFBridge: we should not call get type of
- * this hive expression, because parameters may have
- * been changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
- .getInfo();
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(fid.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- }
- /**
- * get hive generic function expression
- */
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
- return desc;
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
- .getInfo();
- /**
- * set parameters
- */
- aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
-
- List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
- for (ExprNodeDesc parameter : parameters) {
- if (parameter.getTypeInfo() instanceof StructTypeInfo) {
- originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
- } else
- originalParameterTypeInfos.add(parameter.getTypeInfo());
- }
-
- GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
- aggregateDesc.getGenericUDAFName(), originalParameterTypeInfos, aggregateDesc.getDistinct(),
- false);
-
- AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
- aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
- return newAggregateDesc;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
- String funcName = hiveDesc.getUDTFName();
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
- GenericUDTF udtf = fi.getGenericUDTF();
- UDTFDesc desc = new UDTFDesc(udtf);
- return desc;
- } else {
- throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
- }
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
- ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
- return desc;
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
- /**
- * get expression for constant in the environment
- */
- ConstantExpression varExpr = (ConstantExpression) expr;
- Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
- ExprNodeDesc desc = new ExprNodeConstantDesc(value);
- return desc;
- } else {
- throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
- }
- }
-}
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class ExpressionTranslator {
+
+ public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+ FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ Object info = ((HiveFunctionInfo) funcInfo).getInfo();
+ ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
+ return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
+ }
+
+ if (fid.getName().equals(ExpressionConstant.NULL)) {
+ return new ExprNodeNullDesc();
+ }
+
+ /**
+ * argument expressions: translate argument expressions recursively
+ * first, this logic is shared in scalar, aggregation and unnesting
+ * function
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ /**
+ * parameters could not be aggregate function desc
+ */
+ ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
+ parameters.add(parameter);
+ }
+
+ /**
+ * get expression
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+
+ int inputSize = parameters.size();
+ List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
+
+ // generate expression tree if necessary
+ while (inputSize > 2) {
+ int pairs = inputSize / 2;
+ for (int i = 0; i < pairs; i++) {
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
+ descs.add(parameters.get(2 * i));
+ descs.add(parameters.get(2 * i + 1));
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ currentDescs.add(desc);
+ }
+
+ if (inputSize % 2 != 0) {
+ // List<ExprNodeDesc> descs = new
+ // ArrayList<ExprNodeDesc>();
+ // ExprNodeDesc lastExpr =
+ // currentDescs.remove(currentDescs.size() - 1);
+ // descs.add(lastExpr);
+ currentDescs.add(parameters.get(inputSize - 1));
+ // ExprNodeDesc desc =
+ // ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ // currentDescs.add(desc);
+ }
+ inputSize = currentDescs.size();
+ parameters.clear();
+ parameters.addAll(currentDescs);
+ currentDescs.clear();
+ }
+
+ } else {
+ Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ if (secondInfo != null) {
+
+ /**
+ * for GenericUDFBridge: we should not call get type of
+ * this hive expression, because parameters may have
+ * been changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
+ .getInfo();
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(fid.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ }
+ /**
+ * get hive generic function expression
+ */
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
+ return desc;
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * set parameters
+ */
+ aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
+
+ List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
+ for (ExprNodeDesc parameter : parameters) {
+ if (parameter.getTypeInfo() instanceof StructTypeInfo) {
+ originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ originalParameterTypeInfos.add(parameter.getTypeInfo());
+ }
+
+ List<ObjectInspector> originalParameterOIs = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : originalParameterTypeInfos) {
+ originalParameterOIs.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+ GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
+ aggregateDesc.getGenericUDAFName(), originalParameterOIs, aggregateDesc.getDistinct(), false);
+
+ AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
+ aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
+ return newAggregateDesc;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ String funcName = hiveDesc.getUDTFName();
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ GenericUDTF udtf = fi.getGenericUDTF();
+ UDTFDesc desc = new UDTFDesc(udtf);
+ return desc;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
+ ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
+ return desc;
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
+ /**
+ * get expression for constant in the environment
+ */
+ ConstantExpression varExpr = (ConstantExpression) expr;
+ Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
+ ExprNodeDesc desc = new ExprNodeConstantDesc(value);
+ return desc;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
index 8ecdfe1..917487f 100644
--- a/hivesterix/hivesterix-dist/pom.xml
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -1,27 +1,22 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hivesterix-dist</artifactId>
<name>hivesterix-dist</name>
<parent>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- </parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ </parent>
<dependencies>
<dependency>
@@ -32,298 +27,37 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.8.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-translator</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-optimizer</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -474,6 +208,23 @@
</executions>
</plugin>
<plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
<configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
index aeb3fb4..e2da26a 100755
--- a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
+++ b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
@@ -1,17 +1,12 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<assembly>
<id>binary-assembly</id>
<formats>
@@ -21,20 +16,30 @@
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
- <directory>target/appassembler/bin</directory>
+ <directory>src/main/resources/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>src/main/resources/scripts</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>target/appassembler/lib</directory>
<outputDirectory>lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
+ <fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>target</directory>
<outputDirectory>lib</outputDirectory>
<includes>
- <include>*.jar</include>
+ <include>a-hive-patch.jar</include>
</includes>
+ <fileMode>0755</fileMode>
</fileSet>
</fileSets>
-</assembly>
+</assembly>
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index d3bcaca..7b88de4 100644
--- a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -36,16 +36,20 @@
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.mapred.Reporter;
import edu.uci.ics.hivesterix.common.config.ConfUtil;
import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
@@ -214,7 +218,6 @@
// get all leave Ops
getLeaves(rootOps, leaveOps);
-
HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();
try {
translator.translate(rootOps, null, aliasToPath);
@@ -222,7 +225,7 @@
ILogicalPlan plan = translator.genLogicalPlan();
if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {
- translator.printOperators();
+ //translator.printOperators();
ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,
translator.getMetadataProvider());
@@ -238,7 +241,7 @@
StringBuilder buffer = new StringBuilder();
PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
String planStr = buffer.toString();
- System.out.println(planStr);
+ LOG.info(planStr);
if (planPrinter != null)
planPrinter.print(planStr);
@@ -377,6 +380,12 @@
// remove map-reduce branches in condition task
ConditionalTask condition = (ConditionalTask) task;
List<Task<? extends Serializable>> branches = condition.getListTasks();
+ for (Task branch : branches) {
+ if (branch instanceof MoveTask) {
+ //return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);
+ return null;
+ }
+ }
for (int i = branches.size() - 1; i >= 0; i--) {
Task branch = branches.get(i);
if (branch instanceof MapRedTask) {
@@ -396,7 +405,7 @@
MapRedTask mrtask = (MapRedTask) task;
MapredWork work = (MapredWork) mrtask.getWork();
- HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> operators = work.getAliasToWork();
Set entries = operators.entrySet();
Iterator<Entry<String, Operator>> iterator = entries.iterator();
@@ -414,7 +423,7 @@
// get map local work
MapredLocalWork localWork = work.getMapLocalWork();
if (localWork != null) {
- HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> localOperators = localWork.getAliasToWork();
Set localEntries = localOperators.entrySet();
Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();
@@ -479,9 +488,9 @@
for (Operator childMap : childMapOps) {
if (childMap instanceof TableScanOperator) {
TableScanDesc topDesc = (TableScanDesc) childMap.getConf();
- if (topDesc == null)
+ if (topDesc == null || topDesc.getAlias() == null) {
mapChildren.add(childMap);
- else {
+ } else {
rootOps.add(childMap);
}
} else {
@@ -501,9 +510,14 @@
}
i = 0;
for (Operator child : mapChildren) {
- if (child.getParentOperators() == null || child.getParentOperators().size() == 0)
+ if (child.getParentOperators() == null || child.getParentOperators().size() == 0) {
child.setParentOperators(new ArrayList<Operator>());
- child.getParentOperators().add(leafs.get(i));
+ }
+ if (i < leafs.size()) {
+ if (child.getParentOperators().size()==0) {
+ child.getParentOperators().add(leafs.get(i));
+ }
+ }
i++;
}
}
@@ -603,10 +617,10 @@
String specPath = desc.getDirName();
DynamicPartitionCtx dpCtx = desc.getDynPartCtx();
// for 0.7.0
- fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
+ //fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
// for 0.8.0
- // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,
- // desc);
+ //Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc);
+ Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc, Reporter.NULL);
}
}
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
index 4ef74e9..64a3f12 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1,17 +1,3 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -42,11 +28,13 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -62,6 +50,7 @@
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -86,23 +75,22 @@
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.ErrorMsg;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
@@ -112,6 +100,7 @@
import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -127,15 +116,18 @@
import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-@SuppressWarnings({ "deprecation", "unused" })
+@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
public class Driver implements CommandProcessor {
+ // hivesterix
+ private IExecutionEngine engine;
+ private boolean hivesterix = false;
+ private Set<Task> executedConditionalTsks = new HashSet<Task>();
+
static final private Log LOG = LogFactory.getLog(Driver.class.getName());
static final private LogHelper console = new LogHelper(LOG);
- // hive-sterix
- private IExecutionEngine engine;
- private boolean hivesterix = false;
+ private static final Object compileMonitor = new Object();
private int maxRows = 100;
ByteStream.Output bos = new ByteStream.Output();
@@ -152,23 +144,57 @@
// A limit on the number of threads that can be launched
private int maxthreads;
- private final int sleeptime = 2000;
-
+ private static final int SLEEP_TIME = 2000;
protected int tryCount = Integer.MAX_VALUE;
- private int checkLockManager() {
+ /**
+ * for backwards compatibility with current tests
+ */
+ public Driver(HiveConf conf) {
+ this.conf = conf;
+
+ }
+
+ public Driver() {
+ if (SessionState.get() != null) {
+ conf = SessionState.get().getConf();
+ }
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ // hivesterix: plan printer
+ public Driver(HiveConf conf, PrintWriter planPrinter) {
+ this.conf = conf;
+ engine = new HyracksExecutionEngine(conf, planPrinter);
+ }
+
+ public void clear() {
+ this.hivesterix = false;
+ this.executedConditionalTsks.clear();
+ }
+
+ private boolean checkLockManager() {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency && (hiveLockMgr == null)) {
+ if (!supportConcurrency) {
+ return false;
+ }
+ if ((hiveLockMgr == null)) {
try {
setLockManager();
} catch (SemanticException e) {
errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ return false;
}
}
- return (0);
+ // the reason that we set the lock manager for the cxt here is because each
+ // query has its own ctx object. The hiveLockMgr is shared accross the
+ // same instance of Driver, which can run multiple queries.
+ ctx.setHiveLockMgr(hiveLockMgr);
+ return hiveLockMgr != null;
}
private void setLockManager() throws SemanticException {
@@ -183,6 +209,16 @@
hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
} catch (Exception e) {
+ // set hiveLockMgr to null just in case this invalid manager got set to
+ // next query's ctx.
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e1) {
+ //nothing can do here
+ }
+ hiveLockMgr = null;
+ }
throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
}
}
@@ -230,10 +266,8 @@
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
- // partitioned tables don't have tableDesc set on the FetchTask.
- // Instead
- // they have a list of PartitionDesc objects, each with a table
- // desc.
+ // partitioned tables don't have tableDesc set on the FetchTask. Instead
+ // they have a list of PartitionDesc objects, each with a table desc.
// Let's
// try to fetch the desc for the first partition and use it's
// deserializer.
@@ -320,59 +354,102 @@
}
/**
- * for backwards compatibility with current tests
- */
- public Driver(HiveConf conf) {
- this.conf = conf;
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- public Driver() {
- if (SessionState.get() != null) {
- conf = SessionState.get().getConf();
- }
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- // hivesterix: plan printer
- public Driver(HiveConf conf, PrintWriter planPrinter) {
- this.conf = conf;
- engine = new HyracksExecutionEngine(conf, planPrinter);
- }
-
- public void clear() {
- this.hivesterix = false;
- }
-
- /**
- * Compile a new query. Any currently-planned query associated with this
- * Driver is discarded.
+ * Compile a new query. Any currently-planned query associated with this Driver is discarded.
*
* @param command
* The SQL query to compile.
*/
public int compile(String command) {
+ return compile(command, true);
+ }
+
+ /**
+ * Hold state variables specific to each query being executed, that may not
+ * be consistent in the overall SessionState
+ */
+ private static class QueryState {
+ private HiveOperation op;
+ private String cmd;
+ private boolean init = false;
+
+ /**
+ * Initialize the queryState with the query state variables
+ */
+ public void init(HiveOperation op, String cmd) {
+ this.op = op;
+ this.cmd = cmd;
+ this.init = true;
+ }
+
+ public boolean isInitialized() {
+ return this.init;
+ }
+
+ public HiveOperation getOp() {
+ return this.op;
+ }
+
+ public String getCmd() {
+ return this.cmd;
+ }
+ }
+
+ public void saveSession(QueryState qs) {
+ SessionState oldss = SessionState.get();
+ if (oldss != null && oldss.getHiveOperation() != null) {
+ qs.init(oldss.getHiveOperation(), oldss.getCmd());
+ }
+ }
+
+ public void restoreSession(QueryState qs) {
+ SessionState ss = SessionState.get();
+ if (ss != null && qs != null && qs.isInitialized()) {
+ ss.setCmd(qs.getCmd());
+ ss.setCommandType(qs.getOp());
+ }
+ }
+
+ /**
+ * Compile a new query, but potentially reset taskID counter. Not resetting task counter
+ * is useful for generating re-entrant QL queries.
+ *
+ * @param command
+ * The HiveQL query to compile
+ * @param resetTaskIds
+ * Resets taskID counter if true.
+ * @return 0 for ok
+ */
+ public int compile(String command, boolean resetTaskIds) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.COMPILE);
+
+ //holder for parent command type/string when executing reentrant queries
+ QueryState queryState = new QueryState();
+
if (plan != null) {
close();
plan = null;
}
- TaskFactory.resetId();
+ if (resetTaskIds) {
+ TaskFactory.resetId();
+ }
+ saveSession(queryState);
try {
command = new VariableSubstitution().substitute(conf, command);
ctx = new Context(conf);
+ ctx.setTryCount(getTryCount());
+ ctx.setCmd(command);
+ ctx.setHDFSCleanup(true);
ParseDriver pd = new ParseDriver();
ASTNode tree = pd.parse(command, ctx);
tree = ParseUtils.findRootNonNullToken(tree);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
- List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+ List<AbstractSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK,
+ AbstractSemanticAnalyzerHook.class);
// Do semantic analysis and plan generation
if (saHooks != null) {
@@ -382,6 +459,7 @@
tree = hook.preAnalyze(hookCtx, tree);
}
sem.analyze(tree, ctx);
+ hookCtx.update(sem);
for (AbstractSemanticAnalyzerHook hook : saHooks) {
hook.postAnalyze(hookCtx, sem.getRootTasks());
}
@@ -394,19 +472,10 @@
// validate the plan
sem.validate();
- plan = new QueryPlan(command, sem);
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
+ plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN));
// test Only - serialize the query plan and deserialize it
- if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
-
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) {
String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
LOG.info("query plan = " + queryPlanFileName);
@@ -431,17 +500,24 @@
plan.getFetchTask().initialize(conf, plan, null);
}
- // do the authorization check
+ // get the output schema
+ schema = getSchema(sem, conf);
+
+ //do the authorization check
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
- // doAuthorization(sem);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION);
+ doAuthorization(sem);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage()
+ ". Use show grant to get more details.");
return 403;
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION);
}
}
+ //restore state after we're done executing a specific query
// hyracks run
if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
int engineRet = engine.compileJob(sem.getRootTasks());
@@ -450,21 +526,19 @@
}
}
return 0;
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (ParseException e) {
- errorMessage = "FAILED: Parse Error: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (11);
} catch (Exception e) {
- errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
+ errorMessage = "FAILED: " + e.getClass().getSimpleName();
+ if (error != ErrorMsg.GENERIC_ERROR) {
+ errorMessage += " [Error " + error.getErrorCode() + "]:";
+ }
+ errorMessage += " " + e.getMessage();
+ SQLState = error.getSQLState();
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return error.getErrorCode();
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.COMPILE);
+ restoreSession(queryState);
}
}
@@ -479,13 +553,13 @@
ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
} else {
- // if (op.equals(HiveOperation.IMPORT)) {
- // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
- // if (!isa.existsTable()) {
- ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
- HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
- // }
- // }
+ if (op.equals(HiveOperation.IMPORT)) {
+ ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+ if (!isa.existsTable()) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ }
+ }
}
if (outputs != null && outputs.size() > 0) {
for (WriteEntity write : outputs) {
@@ -513,8 +587,8 @@
Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
for (ReadEntity read : inputs) {
- if (read.getPartition() != null) {
- Table tbl = read.getTable();
+ Table tbl = read.getTable();
+ if ((read.getPartition() != null) || (tbl.isPartitioned())) {
String tblName = tbl.getTableName();
if (tableUsePartLevelAuth.get(tblName) == null) {
boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
@@ -533,9 +607,9 @@
ParseContext parseCtx = querySem.getParseContext();
Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
- for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem.getParseContext()
.getTopOps().entrySet()) {
- Operator<? extends Serializable> topOp = topOpMap.getValue();
+ Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
TableScanOperator tableScanOp = (TableScanOperator) topOp;
Table tbl = tsoTopMap.get(tableScanOp);
@@ -551,7 +625,10 @@
cols.add(columns.get(i).getName());
}
}
- if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+ //map may not contain all sources, since input list may have been optimized out
+ //or non-existent tho such sources may still be referenced by the TableScanOperator
+ //if it's null then the partition probably doesn't exist so let's use table permission
+ if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
@@ -582,30 +659,28 @@
// cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
for (ReadEntity read : inputs) {
- Table tbl = null;
+ Table tbl = read.getTable();
if (read.getPartition() != null) {
- tbl = read.getPartition().getTable();
+ Partition partition = read.getPartition();
+ tbl = partition.getTable();
// use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName())) {
- List<String> cols = part2Cols.get(read.getPartition());
+ if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ List<String> cols = part2Cols.get(partition);
if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+ ss.getAuthorizer().authorize(partition.getTable(), partition, cols,
op.getInputRequiredPrivileges(), null);
} else {
- ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+ ss.getAuthorizer().authorize(partition, op.getInputRequiredPrivileges(), null);
}
continue;
}
- } else if (read.getTable() != null) {
- tbl = read.getTable();
}
- // if we reach here, it means it needs to do a table
- // authorization
- // check, and the table authorization may already happened
- // because of other
+ // if we reach here, it means it needs to do a table authorization
+ // check, and the table authorization may already happened because of other
// partitions
- if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+ if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())
+ && !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) {
List<String> cols = tab2Cols.get(tbl);
if (cols != null && cols.size() > 0) {
ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
@@ -632,16 +707,15 @@
* @param p
* The partition to be locked
* @param mode
- * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
- * objects to be locked. If a partition needs to be locked (in
- * any mode), all its parents should also be locked in SHARED
- * mode.
+ * The mode of the lock (SHARED/EXCLUSIVE) Get the list of objects to be locked. If a
+ * partition needs to be locked (in any mode), all its parents should also be locked in
+ * SHARED mode.
**/
private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
if (t != null) {
locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
@@ -665,16 +739,20 @@
name = p.getName().split("@")[2];
}
- String partName = name;
String partialName = "";
String[] partns = name.split("/");
int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+ Map<String, String> partialSpec = new LinkedHashMap<String, String>();
for (int idx = 0; idx < len; idx++) {
String partn = partns[idx];
partialName += partn;
+ String[] nameValue = partn.split("=");
+ assert (nameValue.length == 2);
+ partialSpec.put(nameValue[0], nameValue[1]);
try {
locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
- .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+ .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName, partialSpec),
+ lockData), mode));
partialName += "/";
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
@@ -688,17 +766,16 @@
}
/**
- * Acquire read and write locks needed by the statement. The list of objects
- * to be locked are obtained from he inputs and outputs populated by the
- * compiler. The lock acuisition scheme is pretty simple. If all the locks
- * cannot be obtained, error out. Deadlock is avoided by making sure that
- * the locks are lexicographically sorted.
+ * Acquire read and write locks needed by the statement. The list of objects to be locked are
+ * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is
+ * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
+ * sure that the locks are lexicographically sorted.
**/
public int acquireReadWriteLocks() {
- try {
- int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
- int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
+ try {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
if (!supportConcurrency) {
return 0;
@@ -707,8 +784,7 @@
List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
// Sort all the inputs, outputs.
- // If a lock needs to be acquired on any partition, a read lock
- // needs to be acquired on all
+ // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all
// its parents also
for (ReadEntity input : plan.getInputs()) {
if (input.getType() == ReadEntity.Type.TABLE) {
@@ -719,16 +795,21 @@
}
for (WriteEntity output : plan.getOutputs()) {
+ List<HiveLockObj> lockObj = null;
if (output.getTyp() == WriteEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(output.getTable(), null,
- output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+ lockObj = getLockObjects(output.getTable(), null, output.isComplete() ? HiveLockMode.EXCLUSIVE
+ : HiveLockMode.SHARED);
} else if (output.getTyp() == WriteEntity.Type.PARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE);
}
- // In case of dynamic queries, it is possible to have incomplete
- // dummy partitions
+ // In case of dynamic queries, it is possible to have incomplete dummy partitions
else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.SHARED);
+ }
+
+ if (lockObj != null) {
+ lockObjects.addAll(lockObj);
+ ctx.getOutputLockObjects().put(output, lockObj);
}
}
@@ -736,13 +817,8 @@
return 0;
}
- int ret = checkLockManager();
- if (ret != 0) {
- return ret;
- }
-
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
// Lock the database also
try {
@@ -753,25 +829,7 @@
throw new SemanticException(e.getMessage());
}
- ctx.setHiveLockMgr(hiveLockMgr);
- List<HiveLock> hiveLocks = null;
-
- int tryNum = 1;
- do {
-
- // ctx.getHiveLockMgr();
- // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
-
- if (hiveLocks != null) {
- break;
- }
-
- tryNum++;
- try {
- Thread.sleep(sleepTime);
- } catch (InterruptedException e) {
- }
- } while (tryNum < numRetries);
+ List<HiveLock> hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
if (hiveLocks == null) {
throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
@@ -785,138 +843,207 @@
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- } catch (Exception e) {
+ } catch (LockException e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- }
- }
-
- /**
- * Release all the locks acquired implicitly by the statement. Note that the
- * locks acquired with 'keepAlive' set to True are not released.
- **/
- private void releaseLocks() {
- if (ctx != null && ctx.getHiveLockMgr() != null) {
- try {
- ctx.getHiveLockMgr().close();
- ctx.setHiveLocks(null);
- } catch (LockException e) {
- }
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
/**
* @param hiveLocks
- * list of hive locks to be released Release all the locks
- * specified. If some of the locks have already been released,
- * ignore them
+ * list of hive locks to be released Release all the locks specified. If some of the
+ * locks have already been released, ignore them
**/
private void releaseLocks(List<HiveLock> hiveLocks) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.RELEASE_LOCKS);
+
if (hiveLocks != null) {
ctx.getHiveLockMgr().releaseLocks(hiveLocks);
}
ctx.setHiveLocks(null);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.RELEASE_LOCKS);
}
- public CommandProcessorResponse run(String command) {
+ public CommandProcessorResponse run(String command) throws CommandNeedRetryException {
errorMessage = null;
SQLState = null;
- int ret = compile(command);
+ if (!validateConfVariables()) {
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ HiveDriverRunHookContext hookContext = new HiveDriverRunHookContextImpl(conf, command);
+ // Get all the driver run hooks and pre-execute them.
+ List<HiveDriverRunHook> driverRunHooks;
+ try {
+ driverRunHooks = getHooks(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class);
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.preDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ // Reset the perf logger
+ PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT);
+
+ int ret;
+ synchronized (compileMonitor) {
+ ret = compile(command);
+ }
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // ret = acquireReadWriteLocks();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ boolean requireLock = false;
+ boolean ckLock = checkLockManager();
+
+ if (ckLock) {
+ boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
+ if (lockOnlyMapred) {
+ Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>();
+ taskQueue.addAll(plan.getRootTasks());
+ while (taskQueue.peek() != null) {
+ Task<? extends Serializable> tsk = taskQueue.remove();
+ requireLock = requireLock || tsk.requireLock();
+ if (requireLock) {
+ break;
+ }
+ if (tsk instanceof ConditionalTask) {
+ taskQueue.addAll(((ConditionalTask) tsk).getListTasks());
+ }
+ if (tsk.getChildTasks() != null) {
+ taskQueue.addAll(tsk.getChildTasks());
+ }
+ // does not add back up task here, because back up task should be the same
+ // type of the original task.
+ }
+ } else {
+ requireLock = true;
+ }
+ }
+
+ if (requireLock) {
+ ret = acquireReadWriteLocks();
+ if (ret != 0) {
+ releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
}
ret = execute();
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.close(LOG, plan);
+
+ // Take all the driver run hooks and post-execute them.
+ try {
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.postDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
return new CommandProcessorResponse(ret);
}
- private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
- ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
- String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
- if (pestr == null) {
- return saHooks;
+ /**
+ * Validate configuration variables.
+ *
+ * @return
+ */
+ private boolean validateConfVariables() {
+ boolean valid = true;
+ if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES))
+ && ((conf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE))
+ || (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) || ((conf
+ .getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE))))) {
+ errorMessage = "FAILED: Hive Internal Error: " + ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getMsg();
+ SQLState = ErrorMsg.findSQLState(errorMessage);
+ console.printError(errorMessage + "\n");
+ valid = false;
}
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return saHooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
- saHooks.add(hook);
- } catch (HiveException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return saHooks;
+ return valid;
}
- private List<Hook> getPreExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
+ /**
+ * Returns a set of hooks specified in a configuration variable.
+ * See getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz)
+ *
+ * @param hookConfVar
+ * @return
+ * @throws Exception
+ */
+ private List<Hook> getHooks(HiveConf.ConfVars hookConfVar) throws Exception {
+ return getHooks(hookConfVar, Hook.class);
+ }
+
+ /**
+ * Returns the hooks specified in a configuration variable. The hooks are returned in a list in
+ * the order they were specified in the configuration variable.
+ *
+ * @param hookConfVar
+ * The configuration variable specifying a comma separated list of the hook
+ * class names.
+ * @param clazz
+ * The super type of the hooks.
+ * @return A list of the hooks cast as the type specified in clazz, in the order
+ * they are listed in the value of hookConfVar
+ * @throws Exception
+ */
+ private <T extends Hook> List<T> getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz) throws Exception {
+
+ List<T> hooks = new ArrayList<T>();
+ String csHooks = conf.getVar(hookConfVar);
+ if (csHooks == null) {
+ return hooks;
}
- String[] peClasses = pestr.split(",");
+ csHooks = csHooks.trim();
+ if (csHooks.equals("")) {
+ return hooks;
+ }
- for (String peClass : peClasses) {
+ String[] hookClasses = csHooks.split(",");
+
+ for (String hookClass : hookClasses) {
try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ T hook = (T) Class.forName(hookClass.trim(), true, JavaUtils.getClassLoader()).newInstance();
+ hooks.add(hook);
} catch (ClassNotFoundException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ console.printError(hookConfVar.varname + " Class not found:" + e.getMessage());
throw e;
}
}
- return pehooks;
+ return hooks;
}
- private List<Hook> getPostExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Post Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- public int execute() {
+ public int execute() throws CommandNeedRetryException {
// execute hivesterix plan
if (hivesterix) {
hivesterix = false;
@@ -925,6 +1052,9 @@
return ret;
}
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_EXECUTE);
+
boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
@@ -933,6 +1063,10 @@
conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+
+ conf.set("mapreduce.workflow.id", "hive_" + queryId);
+ conf.set("mapreduce.workflow.name", queryStr);
+
maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
try {
@@ -946,14 +1080,23 @@
}
resStream = null;
- HookContext hookContext = new HookContext(plan, conf);
+ HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+ hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
- for (Hook peh : getPreExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
} else if (peh instanceof PreExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
}
}
@@ -968,32 +1111,36 @@
}
String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
- // A runtime that launches runnable tasks as separate Threads
- // through
+ // A runtime that launches runnable tasks as separate Threads through
// TaskRunners
// As soon as a task isRunnable, it is put in a queue
// At any time, at most maxthreads tasks can be running
- // The main thread polls the TaskRunners to check if they have
- // finished.
+ // The main thread polls the TaskRunners to check if they have finished.
- Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+ Queue<Task<? extends Serializable>> runnable = new ConcurrentLinkedQueue<Task<? extends Serializable>>();
Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
DriverContext driverCxt = new DriverContext(runnable, ctx);
+ ctx.setHDFSCleanup(true);
+
+ SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>());
+ SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
+ SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
// Add root Tasks to runnable
-
for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ // This should never happen, if it does, it's a bug with the potential to produce
+ // incorrect results.
+ assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
driverCxt.addToRunnable(tsk);
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.TIME_TO_SUBMIT);
// Loop while you either have tasks running, or tasks queued up
-
while (running.size() != 0 || runnable.peek() != null) {
// Launch upto maxthreads tasks
while (runnable.peek() != null && running.size() < maxthreads) {
Task<? extends Serializable> tsk = runnable.remove();
- console.printInfo("executing task " + tsk.getName());
launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
}
@@ -1005,12 +1152,24 @@
int exitVal = tskRes.getExitVal();
if (exitVal != 0) {
+ if (tsk.ifRetryCmdWhenFail()) {
+ if (!running.isEmpty()) {
+ taskCleanup(running);
+ }
+ // in case we decided to run everything in local mode, restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+ throw new CommandNeedRetryException();
+ }
Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
if (backupTask != null) {
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
console.printError(errorMessage);
-
errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
console.printError(errorMessage);
@@ -1021,20 +1180,31 @@
continue;
} else {
- // TODO: This error messaging is not very informative.
- // Fix that.
+ hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK);
+ // Get all the failure execution hooks and execute them.
+ for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+
+ ((ExecuteWithHookContext) ofh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+ }
+
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
SQLState = "08S01";
console.printError(errorMessage);
- if (running.size() != 0) {
- taskCleanup();
+ if (!running.isEmpty()) {
+ taskCleanup(running);
}
- // in case we decided to run everything in local mode,
- // restore the
+ // in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
- return 9;
+ return exitVal;
}
}
@@ -1047,9 +1217,9 @@
if (tsk.getChildTasks() != null) {
for (Task<? extends Serializable> child : tsk.getChildTasks()) {
// hivesterix: don't check launchable condition
- // if (DriverContext.isLaunchable(child)) {
+ //if(DriverContext.isLaunchable(tsk)){
driverCxt.addToRunnable(child);
- // }
+ //}
}
}
}
@@ -1059,8 +1229,7 @@
ctx.restoreOriginalTracker();
// remove incomplete outputs.
- // Some incomplete outputs may be added at the beginning, for eg:
- // for dynamic partitions.
+ // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
// remove them
HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
for (WriteEntity output : plan.getOutputs()) {
@@ -1073,15 +1242,24 @@
plan.getOutputs().remove(output);
}
+ hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
// Get all the post execution hooks and execute them.
- for (Hook peh : getPostExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
} else if (peh instanceof PostExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((PostExecute) peh)
.run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
(SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
: null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
}
}
@@ -1089,7 +1267,10 @@
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
SessionState.get().getHiveHistory().printRowCount(queryId);
}
+ } catch (CommandNeedRetryException e) {
+ throw e;
} catch (Exception e) {
+ ctx.restoreOriginalTracker();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
}
@@ -1105,6 +1286,18 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_EXECUTE);
+
+ if (SessionState.get().getLastMapRedStatsList() != null
+ && SessionState.get().getLastMapRedStatsList().size() > 0) {
+ long totalCpu = 0;
+ console.printInfo("MapReduce Jobs Launched: ");
+ for (int i = 0; i < SessionState.get().getLastMapRedStatsList().size(); i++) {
+ console.printInfo("Job " + i + ": " + SessionState.get().getLastMapRedStatsList().get(i));
+ totalCpu += SessionState.get().getLastMapRedStatsList().get(i).getCpuMSec();
+ }
+ console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
+ }
}
plan.setDone();
@@ -1134,14 +1327,12 @@
* name of the task, if it is a map-reduce job
* @param jobs
* number of map-reduce jobs
- * @param curJobNo
- * the sequential number of the next map-reduce job
- * @return the updated number of last the map-reduce job launched
+ * @param cxt
+ * the driver context
*/
public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
-
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
}
@@ -1149,6 +1340,8 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
}
+ conf.set("mapreduce.workflow.node.name", tsk.getId());
+ Utilities.setWorkflowAdjacencies(conf, plan);
cxt.incCurJobNo(1);
console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
}
@@ -1156,7 +1349,13 @@
TaskResult tskRes = new TaskResult();
TaskRunner tskRun = new TaskRunner(tsk, tskRes);
- // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+ // Launch Task
+ //if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) {
+ // Launch it in the parallel mode, as a separate thread only for MR tasks
+ // tskRun.start();
+ //} else {
+ // tskRun.runSequential();
+ //}
// Launch Task: hivesterix tweak
if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
// Launch it in the parallel mode, as a separate thread only for MR
@@ -1169,11 +1368,27 @@
if (crs instanceof ConditionalResolverMergeFiles) {
tskRes.setRunning(false);
tskRes.setExitVal(0);
-
- List<Task<? extends Serializable>> children = condTask.getListTasks();
- for (Task<? extends Serializable> child : children)
- if (child instanceof MapRedTask)
- cxt.addToRunnable(child);
+ if (!executedConditionalTsks.contains(tsk)) {
+ List<Task<? extends Serializable>> children = condTask.getListTasks();
+ Task<? extends Serializable> selectedBranch = null;
+ for (Task<? extends Serializable> branch : children) {
+ if (branch instanceof MoveTask) {
+ selectedBranch = branch;
+ break;
+ }
+ }
+ if (selectedBranch == null) {
+ for (int i = children.size() - 1; i >= 0; i--) {
+ Task<? extends Serializable> child = children.get(i);
+ if (child instanceof MapRedTask) {
+ selectedBranch = child;
+ break;
+ }
+ }
+ }
+ executedConditionalTsks.add(tsk);
+ cxt.addToRunnable(selectedBranch);
+ }
}
} else {
tskRun.runSequential();
@@ -1185,12 +1400,18 @@
/**
* Cleans up remaining tasks in case of failure
*/
-
- public void taskCleanup() {
- // The currently existing Shutdown hooks will be automatically called,
- // killing the map-reduce processes.
- // The non MR processes will be killed as well.
- System.exit(9);
+ public void taskCleanup(Map<TaskResult, TaskRunner> running) {
+ for (Map.Entry<TaskResult, TaskRunner> entry : running.entrySet()) {
+ if (entry.getKey().isRunning()) {
+ Task<?> task = entry.getValue().getTask();
+ try {
+ task.shutdown();
+ } catch (Exception e) {
+ console.printError("Exception on shutting down task " + task.getId() + ": " + e);
+ }
+ }
+ }
+ running.clear();
}
/**
@@ -1214,7 +1435,7 @@
// In this loop, nothing was found
// Sleep 10 seconds and restart
try {
- Thread.sleep(sleeptime);
+ Thread.sleep(SLEEP_TIME);
} catch (InterruptedException ie) {
// Do Nothing
;
@@ -1223,7 +1444,7 @@
}
}
- public boolean getResults(ArrayList<String> res) throws IOException {
+ public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
if (plan != null && plan.getFetchTask() != null) {
FetchTask ft = plan.getFetchTask();
ft.setMaxRows(maxRows);
@@ -1276,6 +1497,14 @@
return true;
}
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+
public int close() {
try {
if (plan != null) {
@@ -1308,18 +1537,21 @@
}
public void destroy() {
- releaseLocks();
+ if (ctx != null) {
+ releaseLocks(ctx.getHiveLocks());
+ }
+
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e) {
+ LOG.warn("Exception in closing hive lock manager. "
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ }
}
public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
return plan.getQueryPlan();
}
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
index 2d5191d..1b96259 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -68,6 +68,7 @@
@Override
public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+ @SuppressWarnings("deprecation")
TypeInfo[] parameters = paramInfo.getParameters();
if (parameters.length == 0) {
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
index 0fea4b9..e26f477 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
@@ -36,7 +36,7 @@
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
-import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
@@ -65,15 +65,18 @@
return result;
}
- public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(Constants.VOID_TYPE_NAME);
- public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME);
- public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(Constants.INT_TYPE_NAME);
- public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(Constants.BIGINT_TYPE_NAME);
- public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(Constants.STRING_TYPE_NAME);
- public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(Constants.FLOAT_TYPE_NAME);
- public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(Constants.DOUBLE_TYPE_NAME);
- public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME);
- public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(Constants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME);
+ public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME);
+ public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME);
+ public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME);
+ public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
+ public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME);
+ public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME);
+ public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME);
+ public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME);
+ public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME);
+ public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME);
public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
deleted file mode 100644
index 23a842a..0000000
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
+++ /dev/null
@@ -1,773 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<configuration>
-
- <!-- Hive Configuration can either be stored in this file or in the hadoop
- configuration files -->
- <!-- that are implied by Hadoop setup variables. -->
- <!-- Aside from Hadoop setup variables - this file is provided as a convenience
- so that Hive -->
- <!-- users do not have to edit hadoop configuration files (that may be managed
- as a centralized -->
- <!-- resource). -->
-
- <!-- Hive Execution Parameters -->
- <property>
- <name>mapred.reduce.tasks</name>
- <value>-1</value>
- <description>The default number of reduce tasks per job. Typically set
- to a prime close to the number of available hosts. Ignored when
- mapred.job.tracker is "local". Hadoop set this to 1 by default,
- whereas hive uses -1 as its default value.
- By setting this property to -1, Hive will automatically figure out what
- should be the number of reducers.
- </description>
- </property>
-
- <property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>4</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.exec.reducers.bytes.per.reducer</name>
- <value>1000000000</value>
- <description>size per reducer.The default is 1G, i.e if the input size
- is 10G, it will use 10 reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.reducers.max</name>
- <value>999</value>
- <description>max number of reducers will be used. If the one
- specified in the configuration parameter mapred.reduce.tasks is
- negative, hive will use this one as the max number of reducers when
- automatically determine number of reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.scratchdir</name>
- <value>/hive-${user.name}</value>
- <description>Scratch space for Hive jobs</description>
- </property>
-
- <property>
- <name>hive.test.mode</name>
- <value>false</value>
- <description>whether hive is running in test mode. If yes, it turns on
- sampling and prefixes the output tablename</description>
- </property>
-
- <property>
- <name>hive.test.mode.prefix</name>
- <value>test_</value>
- <description>if hive is running in test mode, prefixes the output
- table by this string</description>
- </property>
-
- <!-- If the input table is not bucketed, the denominator of the tablesample
- is determinied by the parameter below -->
- <!-- For example, the following query: -->
- <!-- INSERT OVERWRITE TABLE dest -->
- <!-- SELECT col1 from src -->
- <!-- would be converted to -->
- <!-- INSERT OVERWRITE TABLE test_dest -->
- <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
- <property>
- <name>hive.test.mode.samplefreq</name>
- <value>32</value>
- <description>if hive is running in test mode and table is not
- bucketed, sampling frequency</description>
- </property>
-
- <property>
- <name>hive.test.mode.nosamplelist</name>
- <value></value>
- <description>if hive is running in test mode, dont sample the above
- comma seperated list of tables</description>
- </property>
-
- <property>
- <name>hive.metastore.local</name>
- <value>true</value>
- <description>controls whether to connect to remove metastore server or
- open a new metastore server in Hive Client JVM</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
- <description>JDBC connect string for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionDriverName</name>
- <value>org.apache.derby.jdbc.EmbeddedDriver</value>
- <description>Driver class name for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.PersistenceManagerFactoryClass</name>
- <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
- <description>class implementing the jdo persistence</description>
- </property>
-
- <property>
- <name>datanucleus.connectionPoolingType</name>
- <value>DBCP</value>
- <description>Uses a DBCP connection pool for JDBC metastore
- </description>
- </property>
-
- <property>
- <name>javax.jdo.option.DetachAllOnCommit</name>
- <value>true</value>
- <description>detaches all objects from session so that they can be
- used after transaction is committed</description>
- </property>
-
- <property>
- <name>javax.jdo.option.NonTransactionalRead</name>
- <value>true</value>
- <description>reads outside of transactions</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionUserName</name>
- <value>APP</value>
- <description>username to use against metastore database</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionPassword</name>
- <value>mine</value>
- <description>password to use against metastore database</description>
- </property>
-
- <property>
- <name>datanucleus.validateTables</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateColumns</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateConstraints</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.storeManagerType</name>
- <value>rdbms</value>
- <description>metadata store type</description>
- </property>
-
- <property>
- <name>datanucleus.autoCreateSchema</name>
- <value>true</value>
- <description>creates necessary schema on a startup if one doesn't
- exist. set this to false, after creating it once</description>
- </property>
-
- <property>
- <name>datanucleus.autoStartMechanismMode</name>
- <value>checked</value>
- <description>throw exception if metadata tables are incorrect
- </description>
- </property>
-
- <property>
- <name>datanucleus.transactionIsolation</name>
- <value>read-committed</value>
- <description>Default transaction isolation level for identity
- generation. </description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2</name>
- <value>false</value>
- <description>Use a level 2 cache. Turn this off if metadata is changed
- independently of hive metastore server</description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2.type</name>
- <value>SOFT</value>
- <description>SOFT=soft reference based cache, WEAK=weak reference
- based cache.</description>
- </property>
-
- <property>
- <name>datanucleus.identifierFactory</name>
- <value>datanucleus</value>
- <description>Name of the identifier factory to use when generating
- table/column names etc. 'datanucleus' is used for backward
- compatibility</description>
- </property>
-
- <property>
- <name>hive.metastore.warehouse.dir</name>
- <value>/user/hivesterix</value>
- <description>location of default database for the warehouse
- </description>
- </property>
-
- <property>
- <name>hive.metastore.connect.retries</name>
- <value>5</value>
- <description>Number of retries while opening a connection to metastore
- </description>
- </property>
-
- <property>
- <name>hive.metastore.rawstore.impl</name>
- <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
- <description>Name of the class that implements
- org.apache.hadoop.hive.metastore.rawstore interface. This class is
- used to store and retrieval of raw metadata objects such as table,
- database</description>
- </property>
-
- <property>
- <name>hive.default.fileformat</name>
- <value>TextFile</value>
- <description>Default file format for CREATE TABLE statement. Options
- are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
- ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
- </property>
-
- <property>
- <name>hive.fileformat.check</name>
- <value>true</value>
- <description>Whether to check file format or not when loading data
- files</description>
- </property>
-
- <property>
- <name>hive.map.aggr</name>
- <value>true</value>
- <description>Whether to use map-side aggregation in Hive Group By
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.skewindata</name>
- <value>false</value>
- <description>Whether there is skew in data to optimize group by
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.mapaggr.checkinterval</name>
- <value>100000</value>
- <description>Number of rows after which size of the grouping
- keys/aggregation classes is performed</description>
- </property>
-
- <property>
- <name>hive.mapred.local.mem</name>
- <value>0</value>
- <description>For local mode, memory of the mappers/reducers
- </description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.percentmemory</name>
- <value>0.5</value>
- <description>Portion of total memory to be used by map-side grup
- aggregation hash table</description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.min.reduction</name>
- <value>0.5</value>
- <description>Hash aggregation will be turned off if the ratio between
- hash
- table size and input rows is bigger than this number. Set to 1 to make
- sure
- hash aggregation is never turned off.</description>
- </property>
-
- <property>
- <name>hive.optimize.cp</name>
- <value>true</value>
- <description>Whether to enable column pruner</description>
- </property>
-
- <property>
- <name>hive.optimize.ppd</name>
- <value>true</value>
- <description>Whether to enable predicate pushdown</description>
- </property>
-
- <property>
- <name>hive.optimize.pruner</name>
- <value>true</value>
- <description>Whether to enable the new partition pruner which depends
- on predicate pushdown. If this is disabled,
- the old partition pruner which is based on AST will be enabled.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.groupby</name>
- <value>true</value>
- <description>Whether to enable the bucketed group by from bucketed
- partitions/tables.</description>
- </property>
-
- <property>
- <name>hive.join.emit.interval</name>
- <value>1000</value>
- <description>How many rows in the right-most join operand Hive should
- buffer before emitting the join result. </description>
- </property>
-
- <property>
- <name>hive.join.cache.size</name>
- <value>25000</value>
- <description>How many rows in the joining tables (except the streaming
- table) should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.bucket.cache.size</name>
- <value>100</value>
- <description>How many values in each keys in the map-joined table
- should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.maxsize</name>
- <value>100000</value>
- <description>Maximum # of rows of the small table that can be handled
- by map-side join. If the size is reached and hive.task.progress is
- set, a fatal error counter is set and the job will be killed.
- </description>
- </property>
-
- <property>
- <name>hive.mapjoin.cache.numrows</name>
- <value>25000</value>
- <description>How many rows should be cached by jdbm for map join.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.skewjoin</name>
- <value>false</value>
- <description>Whether to enable skew join optimization. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.key</name>
- <value>100000</value>
- <description>Determine if we get a skew key in join. If we see more
- than the specified number of rows with the same key in join operator,
- we think the key as a skew join key. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.map.tasks</name>
- <value>10000</value>
- <description> Determine the number of map task used in the follow up
- map join job
- for a skew join. It should be used together with
- hive.skewjoin.mapjoin.min.split
- to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.min.split</name>
- <value>33554432</value>
- <description> Determine the number of map task at most used in the
- follow up map join job
- for a skew join by specifying the minimum split size. It should be used
- together with
- hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.mapred.mode</name>
- <value>nonstrict</value>
- <description>The mode in which the hive operations are being
- performed. In strict mode, some risky queries are not allowed to run
- </description>
- </property>
-
- <property>
- <name>hive.exec.script.maxerrsize</name>
- <value>100000</value>
- <description>Maximum number of bytes a script is allowed to emit to
- standard error (per map-reduce task). This prevents runaway scripts
- from filling logs partitions to capacity </description>
- </property>
-
- <property>
- <name>hive.exec.script.allow.partial.consumption</name>
- <value>false</value>
- <description> When enabled, this option allows a user script to exit
- successfully without consuming all the data from the standard input.
- </description>
- </property>
-
- <property>
- <name>hive.script.operator.id.env.var</name>
- <value>HIVE_SCRIPT_OPERATOR_ID</value>
- <description> Name of the environment variable that holds the unique
- script operator ID in the user's transform function (the custom
- mapper/reducer that the user has specified in the query)
- </description>
- </property>
-
- <property>
- <name>hive.exec.compress.output</name>
- <value>false</value>
- <description> This controls whether the final outputs of a query (to a
- local/hdfs file or a hive table) is compressed. The compression codec
- and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.compress.intermediate</name>
- <value>false</value>
- <description> This controls whether intermediate files produced by
- hive between multiple map-reduce jobs are compressed. The compression
- codec and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.parallel</name>
- <value>false</value>
- <description>Whether to execute jobs in parallel</description>
- </property>
-
- <property>
- <name>hive.exec.parallel.thread.number</name>
- <value>8</value>
- <description>How many jobs at most can be executed in parallel
- </description>
- </property>
-
- <property>
- <name>hive.hwi.war.file</name>
- <value>lib\hive-hwi-0.7.0.war</value>
- <description>This sets the path to the HWI war file, relative to
- ${HIVE_HOME}. </description>
- </property>
-
- <property>
- <name>hive.hwi.listen.host</name>
- <value>0.0.0.0</value>
- <description>This is the host address the Hive Web Interface will
- listen on</description>
- </property>
-
- <property>
- <name>hive.hwi.listen.port</name>
- <value>9999</value>
- <description>This is the port the Hive Web Interface will listen on
- </description>
- </property>
-
- <property>
- <name>hive.exec.pre.hooks</name>
- <value></value>
- <description>Pre Execute Hook for Tests</description>
- </property>
-
- <property>
- <name>hive.merge.mapfiles</name>
- <value>true</value>
- <description>Merge small files at the end of a map-only job
- </description>
- </property>
-
- <property>
- <name>hive.merge.mapredfiles</name>
- <value>false</value>
- <description>Merge small files at the end of a map-reduce job
- </description>
- </property>
-
- <property>
- <name>hive.heartbeat.interval</name>
- <value>1000</value>
- <description>Send a heartbeat after this interval - used by mapjoin
- and filter operators</description>
- </property>
-
- <property>
- <name>hive.merge.size.per.task</name>
- <value>256000000</value>
- <description>Size of merged files at the end of the job</description>
- </property>
-
- <property>
- <name>hive.merge.size.smallfiles.avgsize</name>
- <value>16000000</value>
- <description>When the average output file size of a job is less than
- this number, Hive will start an additional map-reduce job to merge
- the output files into bigger files. This is only done for map-only
- jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
- hive.merge.mapredfiles is true.</description>
- </property>
-
- <property>
- <name>hive.script.auto.progress</name>
- <value>false</value>
- <description>Whether Hive Tranform/Map/Reduce Clause should
- automatically send progress information to TaskTracker to avoid the
- task getting killed because of inactivity. Hive sends progress
- information when the script is outputting to stderr. This option
- removes the need of periodically producing stderr messages, but users
- should be cautious because this may prevent infinite loops in the
- scripts to be killed by TaskTracker. </description>
- </property>
-
- <property>
- <name>hive.script.serde</name>
- <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
- <description>The default serde for trasmitting input data to and
- reading output data from the user scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordreader</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
- <description>The default record reader for reading data from the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordwriter</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
- <description>The default record writer for writing data to the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.input.format</name>
- <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
- <description>The default input format, if it is not specified, the
- system assigns it. It is set to HiveInputFormat for hadoop versions
- 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
- hadoop 20. The user can always overwrite it - if there is a bug in
- CombinedHiveInputFormat, it can always be manually set to
- HiveInputFormat. </description>
- </property>
-
- <property>
- <name>hive.udtf.auto.progress</name>
- <value>false</value>
- <description>Whether Hive should automatically send progress
- information to TaskTracker when using UDTF's to prevent the task
- getting killed because of inactivity. Users should be cautious
- because this may prevent TaskTracker from killing tasks with infinte
- loops. </description>
- </property>
-
- <property>
- <name>hive.mapred.reduce.tasks.speculative.execution</name>
- <value>true</value>
- <description>Whether speculative execution for reducers should be
- turned on. </description>
- </property>
-
- <property>
- <name>hive.exec.counters.pull.interval</name>
- <value>1000</value>
- <description>The interval with which to poll the JobTracker for the
- counters the running job. The smaller it is the more load there will
- be on the jobtracker, the higher it is the less granular the caught
- will be.</description>
- </property>
-
- <property>
- <name>hive.enforce.bucketing</name>
- <value>false</value>
- <description>Whether bucketing is enforced. If true, while inserting
- into the table, bucketing is enforced. </description>
- </property>
-
- <property>
- <name>hive.enforce.sorting</name>
- <value>false</value>
- <description>Whether sorting is enforced. If true, while inserting
- into the table, sorting is enforced. </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.connection.url.hook</name>
- <value></value>
- <description>Name of the hook to use for retriving the JDO connection
- URL. If empty, the value in javax.jdo.option.ConnectionURL is used
- </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.attempts</name>
- <value>1</value>
- <description>The number of times to retry a metastore call if there
- were a connection error</description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.interval</name>
- <value>1000</value>
- <description>The number of miliseconds between metastore retry
- attempts</description>
- </property>
-
- <property>
- <name>hive.metastore.server.min.threads</name>
- <value>200</value>
- <description>Minimum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.max.threads</name>
- <value>100000</value>
- <description>Maximum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.tcp.keepalive</name>
- <value>true</value>
- <description>Whether to enable TCP keepalive for the metastore server.
- Keepalive will prevent accumulation of half-open connections.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.reducededuplication</name>
- <value>true</value>
- <description>Remove extra map-reduce jobs if the data is already
- clustered by the same key which needs to be used again. This should
- always be set to true. Since it is a new feature, it has been made
- configurable.</description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition</name>
- <value>false</value>
- <description>Whether or not to allow dynamic partitions in DML/DDL.
- </description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition.mode</name>
- <value>strict</value>
- <description>In strict mode, the user must specify at least one static
- partition in case the user accidentally overwrites all partitions.
- </description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions</name>
- <value>1000</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in total.</description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions.pernode</name>
- <value>100</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in each mapper/reducer node.</description>
- </property>
-
- <property>
- <name>hive.default.partition.name</name>
- <value>__HIVE_DEFAULT_PARTITION__</value>
- <description>The default partition name in case the dynamic partition
- column value is null/empty string or anyother values that cannot be
- escaped. This value must not contain any special character used in
- HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
- dynamic partition value should not contain this value to avoid
- confusions.</description>
- </property>
-
- <property>
- <name>fs.har.impl</name>
- <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
- <description>The implementation for accessing Hadoop Archives. Note
- that this won't be applicable to Hadoop vers less than 0.20
- </description>
- </property>
-
- <property>
- <name>hive.archive.enabled</name>
- <value>false</value>
- <description>Whether archiving operations are permitted</description>
- </property>
-
- <property>
- <name>hive.archive.har.parentdir.settable</name>
- <value>false</value>
- <description>In new Hadoop versions, the parent directory must be set
- while
- creating a HAR. Because this functionality is hard to detect with just
- version
- numbers, this conf var needs to be set manually.</description>
- </property>
-
- <!-- HBase Storage Handler Parameters -->
-
- <property>
- <name>hive.hbase.wal.enabled</name>
- <value>true</value>
- <description>Whether writes to HBase should be forced to the
- write-ahead log. Disabling this improves HBase write performance at
- the risk of lost writes in case of a crash.</description>
- </property>
-
-</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
index eab38a6..6f195f5 100644
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
@@ -31,7 +31,7 @@
# FATAL, ERROR, WARN, INFO, DEBUG
#
#------------------------------------------------------------------------------
-log4j.rootCategory=INFO, S
+log4j.rootCategory=FATAL, S
log4j.logger.com.dappit.Dapper.parser=ERROR
log4j.logger.org.w3c.tidy=FATAL
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
new file mode 100644
index 0000000..ccfcd74
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
@@ -0,0 +1,5189 @@
+<?xml version="1.0"?>
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+ <configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>32768</value>
+ </property>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically
+ set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property
+ to -1, Hive will automatically figure out
+ what should be the number
+ of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input
+ size is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified in the configuration parameter mapred.reduce.tasks is
+ negative, hive will use this one as the max number of reducers when
+ automatically determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns
+ on sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is
+ changed independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to
+ metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the
+ client side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is
+ turned off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat
+ the 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1
+ to make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for
+ the tables in the join.
+ This is based on the skewed keys stored in
+ the metadata. At compile
+ time, the plan is broken
+ into different
+ joins: one for the skewed keys, and the other for the
+ remaining keys.
+ And then,
+ a union is performed for the 2 joins generated above. So
+ unless the
+ same skewed key is present
+ in both the joined tables, the
+ join for the skewed key will be
+ performed as a map-side join.
+
+ The main
+ difference between this paramater and hive.optimize.skewjoin
+ is that
+ this parameter
+ uses the skew information stored in the metastore to
+ optimize the plan
+ at compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored
+ in the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the
+ query plan to take care of it, and
+ hive.optimize.skewjoin will be a
+ no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when
+ hive.optimize.skewjoin.compiletime is set
+ to true, since an
+ extra
+ union is inserted.
+
+ The merge is triggered if either of
+ hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to true.
+ If the
+ user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles
+ to false, the idea was the
+ number of reducers are few, so the number
+ of files anyway are small.
+ However, with this optimization,
+ we are
+ increasing the number of files possibly by a big margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>true</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c,
+ count(1) from T group by a, b, c with
+ rollup;
+ 4 rows are created per
+ row: (a, b, c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to explosion across map-reduce boundary if the
+ cardinality
+ of T is very high,
+ and map-side aggregation does not do a
+ very good job.
+
+ This parameter decides if hive should add an
+ additional map-reduce job.
+ If the grouping set
+ cardinality (4 in the
+ example above), is more than this value, a new MR job is
+ added under
+ the
+ assumption that the orginal group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the
+ streaming table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm
+ is as follows: At runtime, detect the keys with a large
+ skew. Instead
+ of
+ processing those keys, store them temporarily in a hdfs directory.
+ In a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up
+ map-reduce job (for the skewed keys) would be much
+ faster, since it
+ would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join
+ operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum
+ split size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script
+ in scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to
+ a local/hdfs file or a hive table) is compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The
+ compression codec and other options are determined from hadoop
+ config variables mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be
+ invoked for each statement. A pre-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as
+ the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table; If the local
+ task's memory usage is more than this number, the local task will be
+ abort by themself. It means the data of small table is too large to
+ be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table when this map
+ join followed by a group by; If the local task's memory usage is
+ more than this number, the local task will be abort by themself. It
+ means the data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified
+ size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a
+ n-way join is smaller
+ than this size, the join is directly
+ converted to a mapjoin(there is
+ no conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does
+ not take
+ affect. If it is on, and if there are map-join jobs followed
+ by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged
+ with the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but
+ users should be cautious because this may prevent infinite loops in
+ the scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if
+ it is not divide evenly by the value of
+ hive.exec.counters.pull.interval it will be
+ logged less frequently
+ than specified.
+ This only has an effect if
+ hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where T1 and T2 are bucketed/sorted by the same keys into the same
+ number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query)
+ of the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore
+ server. Keepalive will prevent accumulation of half-open
+ connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is
+ used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one
+ static partition in case the user accidentally overwrites all
+ partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that
+ the dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC
+ connection and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats
+ publisher/aggregator got an exception updating intermediate
+ database. Default is no tries on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the
+ next retry. The actual wait time is calculated by baseWindow *
+ failues baseWindow * (failure 1) * (random number between
+ [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables
+ are accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats
+ collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the value
+ < 0 then hashing
+ is never used,
+ if the value >= 0 then hashing is used only when the
+ key prefixes
+ length
+ exceeds that value. The key prefix is defined as everything
+ preceding the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will
+ do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to
+ userX and userY,
+ and grant create privilege to userZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a
+ new table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to
+ roleX and roleY,
+ and grant create privilege to roleZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition
+ insert generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the
+ index data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the
+ entries in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would
+ happen if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias
+ auto generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text"
+ (for human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move
+ tasks have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled,
+ the tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH
+ task minimizing
+ latency.
+ Currently the query should be single sourced
+ not having any subquery and
+ should not have
+ any aggregations or
+ distincts (which incurrs RS), lateral views and
+ joins.
+ 1. minimal :
+ SELECT STAR, FILTER on partition columns, LIMIT only
+ 2. more :
+ SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not
+ set, then logging will be initialized using
+ hive-log4j.properties
+ found on the classpath.
+ If the property is set, the value must be a
+ valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"),
+ which you can then
+ extract a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be
+ initialized using
+ hive-exec-log4j.properties found on the classpath.
+ If the property is set, the value must be a valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you can then
+ extract a URL from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from
+ the table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will
+ choose a number which is a power of two,
+ unless the user specifies
+ the number of reducers to use using mapred.reduce.tasks. The number
+ of
+ reducers
+ may be set to a power of two, only to be followed by a
+ merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will
+ be the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers
+ being used in the
+ final map reduce
+ job, e.g. if a job was originally going to take 257
+ reducers,
+ it will
+ now take 512 reducers, similarly if the max number of reducers
+ is
+ 511,
+ and a job was going to use this many, it will now use 256
+ reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group
+ By or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting
+ $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication
+ check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate
+ requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+ </configuration>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property to
+ -1, Hive will automatically figure out what
+ should be the number of
+ reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input size
+ is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified
+ in the configuration parameter mapred.reduce.tasks is
+ negative, hive
+ will use this one as the max number of reducers when
+ automatically
+ determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns on
+ sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is changed
+ independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the client
+ side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is turned
+ off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat the
+ 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying
+ by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1 to
+ make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for the
+ tables in the join.
+ This is based on the skewed keys stored in the
+ metadata. At compile time,
+ the plan is broken
+ into different joins: one
+ for the skewed keys, and the other for the
+ remaining keys. And then,
+ a
+ union is performed for the 2 joins generated above. So unless the
+ same skewed key is present
+ in both the joined tables, the join for the
+ skewed key will be
+ performed as a map-side join.
+
+ The main difference
+ between this paramater and hive.optimize.skewjoin is
+ that this
+ parameter
+ uses the skew information stored in the metastore to
+ optimize the plan at
+ compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should
+ be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored in
+ the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the query
+ plan to take care of it, and hive.optimize.skewjoin
+ will be a no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when hive.optimize.skewjoin.compiletime
+ is set
+ to true, since an
+ extra union is inserted.
+
+ The merge is triggered
+ if either of hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to
+ true.
+ If the user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles to false, the idea was the
+ number of reducers
+ are few, so the number of files anyway are small.
+ However, with this
+ optimization,
+ we are increasing the number of files possibly by a big
+ margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>false</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c, count(1)
+ from T group by a, b, c with
+ rollup;
+ 4 rows are created per row: (a, b,
+ c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to
+ explosion across map-reduce boundary if the cardinality
+ of T is very
+ high,
+ and map-side aggregation does not do a very good job.
+
+ This
+ parameter decides if hive should add an additional map-reduce job.
+ If
+ the grouping set
+ cardinality (4 in the example above), is more than
+ this value, a new MR job is
+ added under the
+ assumption that the orginal
+ group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the streaming
+ table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm is
+ as follows: At runtime, detect the keys with a large
+ skew. Instead of
+ processing those keys, store them temporarily in a hdfs directory. In
+ a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up map-reduce
+ job (for the skewed keys) would be much faster,
+ since it would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum split
+ size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script in
+ scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to a
+ local/hdfs file or a hive table) is compressed. The compression codec
+ and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be invoked
+ for each statement. A pre-execution hook is specified as the name of
+ a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table; If the local task's
+ memory usage is more than this number, the local task will be abort
+ by themself. It means the data of small table is too large to be hold
+ in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table when this map join
+ followed by a group by; If the local task's memory usage is more than
+ this number, the local task will be abort by themself. It means the
+ data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions
+ for a n-way join is smaller than the
+ specified size, the join is
+ directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a n-way
+ join is smaller than
+ this size, the join is directly
+ converted to a mapjoin(there is no
+ conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does not
+ take
+ affect. If it is on, and if there are map-join jobs followed by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged with
+ the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but users
+ should be cautious because this may prevent infinite loops in the
+ scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if it
+ is not divide evenly by the value of
+ hive.exec.counters.pull.interval
+ it will be
+ logged less frequently than specified.
+ This only has an
+ effect if hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where
+ T1 and T2 are bucketed/sorted by the same keys into the same number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one static
+ partition in case the user accidentally overwrites all partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
+ dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC connection
+ and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats publisher/aggregator
+ got an exception updating intermediate database. Default is no tries
+ on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the next
+ retry. The actual wait time is calculated by baseWindow * failues
+ baseWindow * (failure 1) * (random number between [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could
+ not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables are
+ accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the
+ value < 0 then hashing
+ is never used, if
+ the value >= 0 then hashing is used only when the key
+ prefixes length
+ exceeds that value. The key prefix is defined as everything preceding
+ the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to userX
+ and userY,
+ and grant create privilege to userZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to roleX
+ and roleY,
+ and grant create privilege to roleZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition insert
+ generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the index
+ data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the entries
+ in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by
+ record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would happen
+ if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias auto
+ generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text" (for
+ human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move tasks
+ have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled, the
+ tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH task
+ minimizing
+ latency.
+ Currently the query should be single sourced not
+ having any subquery and
+ should not have
+ any aggregations or distincts
+ (which incurrs RS), lateral views and
+ joins.
+ 1. minimal : SELECT STAR,
+ FILTER on partition columns, LIMIT only
+ 2. more : SELECT, FILTER,
+ LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not set,
+ then logging will be initialized using
+ hive-log4j.properties found on
+ the classpath.
+ If the property is set, the value must be a valid URI
+ (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you
+ can then extract
+ a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be initialized
+ using
+ hive-exec-log4j.properties found on the classpath.
+ If the
+ property is set, the value must be a valid URI (java.net.URI,
+ e.g.
+ "file:///tmp/my-logging.properties"), which you can then extract
+ a URL
+ from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from the
+ table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will choose
+ a number which is a power of two,
+ unless the user specifies
+ the number
+ of reducers to use using mapred.reduce.tasks. The number of
+ reducers
+ may be set to a power of two, only to be followed by a merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will be
+ the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers being
+ used in the
+ final map reduce job,
+ e.g. if a job was originally going to take 257
+ reducers,
+ it will now
+ take 512 reducers, similarly if the max number of reducers
+ is 511,
+ and
+ a job was going to use this many, it will now use 256 reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group By
+ or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must
+ be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh
new file mode 100644
index 0000000..7767b2d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh
@@ -0,0 +1,7 @@
+. conf/cluster.properties
+
+NODEID=`hostname | cut -d '.' -f 1`
+#echo $NODEID
+
+#echo "rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}"
+rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh
new file mode 100644
index 0000000..e7d45e8
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh
@@ -0,0 +1,12 @@
+. conf/cluster.properties
+PREGELIX_PATH=`pwd`
+LOG_PATH=$PREGELIX_PATH/logs/
+rm -rf $LOG_PATH
+mkdir $LOG_PATH
+ccname=`hostname`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; bin/dumptrace.sh; bin/copylog.sh ${ccname} ${LOG_PATH}"
+done
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh
new file mode 100644
index 0000000..9fe55f0
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh
@@ -0,0 +1,15 @@
+echo `hostname`
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -QUIT $PID
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
index 38a9e33..d2ef909 100755
--- a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
@@ -175,13 +175,6 @@
exit 5
fi
-if [ $hadoop_minor_ver -ne 20 -o $hadoop_patch_ver -eq 0 ]; then
- echo "Hive requires Hadoop 0.20.x (x >= 1)."
- echo "'hadoop version' returned:"
- echo `$HADOOP version`
- exit 6
-fi
-
if [ "${AUX_PARAM}" != "" ]; then
HIVE_OPTS="$HIVE_OPTS -hiveconf hive.aux.jars.path=${AUX_PARAM}"
AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
index b024269..5fdfb3a 100644
--- a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
@@ -12,77 +12,77 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
index 062f7b9..ae2fa3a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q12_shipping
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
index 76d0475..c617c26 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
@@ -13,8 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-- the query
insert overwrite table q18_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
index fd330cd..a7a0a0a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-- the query
insert overwrite table q19_discounted_revenue
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
index 63297e6..3149962 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
@@ -22,7 +22,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q20_tmp1
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
index a975ce1..091f000 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
index 3dfb22a..444644f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
@@ -17,8 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q7_volume_shipping_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
index 586779c..a9bb58b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
@@ -17,8 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-- the query
insert overwrite table q9_product_type_profit
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
index 2891c56..70b3538 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
@@ -6,4 +6,7 @@
create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/orders';
create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/customer';
-select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from customer c join orders o on c.c_custkey = o.o_custkey join lineitem l on o.o_orderkey = l.l_orderkey where c.c_custkey<5 and o.o_totalprice<30000;
+select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from
+ customer c join orders o on c.c_custkey = o.o_custkey
+ join lineitem l on o.o_orderkey = l.l_orderkey
+ where c.c_custkey<5 and o.o_totalprice<30000;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
index 05b3718..bda1113 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
@@ -1,5 +1,5 @@
write [%0->$$38, %0->$$39, %0->$$45, %0->$$40, %0->$$42, %0->$$43, %0->$$41, %0->$$44]
--- SINK_WRITE |UNPARTITIONED|
+-- SINK_WRITE |PARTITIONED|
project ([$$38, $$39, $$45, $$40, $$42, $$43, $$41, $$44])
-- STREAM_PROJECT |PARTITIONED|
limit 20
@@ -9,11 +9,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$45(DESC) ] |PARTITIONED|
limit 20
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$45)
- -- STABLE_SORT [$$45(DESC)] |LOCAL|
+ -- STABLE_SORT [$$45(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$38 := %0->$$48; $$39 := %0->$$49; $$40 := %0->$$50; $$41 := %0->$$51; $$42 := %0->$$52; $$43 := %0->$$53; $$44 := %0->$$54]) decor ([]) {
@@ -31,7 +31,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$21, $$22, $$26, $$25, $$18, $$23, $$28] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$21, $$22, $$26, $$25, $$18, $$23, $$28] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$21, $$22, $$23, $$25, $$26, $$28, $$18, $$6, $$7])
@@ -82,7 +82,7 @@
-- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED|
project ([$$30, $$29])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01], function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -99,4 +99,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
index 70ad7ee..8195ef0 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$3, $$4])
@@ -85,7 +85,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2]<-[$$1, $$2] <- default.q11_part_tmp
@@ -99,7 +99,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(DESC) ] |PARTITIONED|
order (DESC, %0->$$3)
- -- STABLE_SORT [$$3(DESC)] |LOCAL|
+ -- STABLE_SORT [$$3(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3])
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
index 5c240e2..d976bba 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
@@ -7,7 +7,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$26(ASC) ] |PARTITIONED|
order (ASC, %0->$$26)
- -- STABLE_SORT [$$26(ASC)] |LOCAL|
+ -- STABLE_SORT [$$26(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$26 := %0->$$34]) decor ([]) {
@@ -25,7 +25,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$24] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$24] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$24])
@@ -38,7 +38,7 @@
-- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
project ([$$10, $$24])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -55,4 +55,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
index 19bcd24..40cedd6 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
@@ -7,74 +7,65 @@
exchange
-- SORT_MERGE_EXCHANGE [$$21(DESC), $$20(DESC) ] |PARTITIONED|
order (DESC, %0->$$21) (DESC, %0->$$20)
- -- STABLE_SORT [$$21(DESC), $$20(DESC)] |LOCAL|
+ -- STABLE_SORT [$$21(DESC), $$20(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$20 := %0->$$28]) decor ([]) {
- aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$27]]
+ group by ([$$20 := %0->$$26]) decor ([]) {
+ aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$28] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$28] |PARTITIONED|
- group by ([$$28 := %0->$$19]) decor ([]) {
- aggregate [$$27] <- [function-call: hive:count(PARTIAL1), Args:[1]]
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ group by ([$$26 := %0->$$19]) decor ([]) {
+ aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$19] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$19] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$19])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$18 := %0->$$26]) decor ([]) {
- aggregate [$$19] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
+ group by ([$$18 := %0->$$10]) decor ([]) {
+ aggregate [$$19] <- [function-call: hive:count(COMPLETE), Args:[%0->$$1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$10] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- group by ([$$26 := %0->$$10]) decor ([]) {
- aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[%0->$$1]]
- -- AGGREGATE |LOCAL|
- nested tuple source
- -- NESTED_TUPLE_SOURCE |LOCAL|
- }
- -- EXTERNAL_GROUP_BY[$$10] |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$10, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- project ([$$10, $$1])
- -- STREAM_PROJECT |PARTITIONED|
+ left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
+ -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
- -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
+ data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
- data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- project ([$$2, $$1])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
- -- STREAM_SELECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
+ project ([$$2, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
index 21b90bd..e6d1c1d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
@@ -21,7 +21,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$5, $$15, $$16])
@@ -34,7 +34,7 @@
-- HASH_PARTITION_EXCHANGE [$$11] |PARTITIONED|
project ([$$11, $$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01], function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -51,4 +51,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
index a5bd27a..c61ed37 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
@@ -19,12 +19,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$3] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$3] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01], function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -53,7 +53,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- PRE_CLUSTERED_GROUP_BY[] |LOCAL|
+ -- PRE_CLUSTERED_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2]<-[$$1, $$2] <- default.revenue
@@ -67,7 +67,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$2(ASC) ] |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3, $$4, $$6, $$10])
@@ -107,4 +107,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
index 9835346..c986cb4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
@@ -38,7 +38,7 @@
-- HYBRID_HASH_JOIN [$$2][$$11] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]], function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -65,7 +65,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC) ] |PARTITIONED|
order (DESC, %0->$$13) (ASC, %0->$$10) (ASC, %0->$$11) (ASC, %0->$$12)
- -- STABLE_SORT [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC)] |LOCAL|
+ -- STABLE_SORT [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$10 := %0->$$5; $$11 := %0->$$6; $$12 := %0->$$7]) decor ([]) {
@@ -95,4 +95,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
index a827007..8a75f64 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
@@ -21,7 +21,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$2] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$2] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -53,13 +53,13 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$17])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$16, %0->$$2])
- -- STREAM_SELECT |UNPARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$13, %0->$$1])
@@ -84,7 +84,7 @@
-- HASH_PARTITION_EXCHANGE [$$3] |PARTITIONED|
project ([$$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$9, MED BOX], function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -101,4 +101,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
index ea47ea0..eb78f1d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$1, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -29,7 +29,7 @@
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
write [%0->$$36, %0->$$37, %0->$$38, %0->$$39, %0->$$40, %0->$$41]
--- SINK_WRITE |UNPARTITIONED|
+-- SINK_WRITE |PARTITIONED|
project ([$$36, $$37, $$38, $$39, $$40, $$41])
-- STREAM_PROJECT |PARTITIONED|
limit 100
@@ -39,11 +39,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$40(DESC), $$39(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$40) (ASC, %0->$$39)
- -- STABLE_SORT [$$40(DESC), $$39(ASC)] |LOCAL|
+ -- STABLE_SORT [$$40(DESC), $$39(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$36 := %0->$$44; $$37 := %0->$$45; $$38 := %0->$$46; $$39 := %0->$$47; $$40 := %0->$$48]) decor ([]) {
@@ -61,7 +61,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$20, $$19, $$27, $$31, $$30] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$20, $$19, $$27, $$31, $$30] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$19, $$20, $$27, $$30, $$31, $$7])
@@ -90,7 +90,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:gt, Args:[%0->$$2, 300])
+ select (function-call: algebricks:gt, Args:[%0->$$2, 300.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
index 1827729..9e97b7a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
@@ -17,13 +17,13 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1]], function-call: algebricks:le, Args:[%0->$$14, 11]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10]], function-call: algebricks:le, Args:[%0->$$14, 20]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20]], function-call: algebricks:le, Args:[%0->$$14, 30]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
- -- STREAM_SELECT |UNPARTITIONED|
+ select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1.0]], function-call: algebricks:le, Args:[%0->$$14, 11.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10.0]], function-call: algebricks:le, Args:[%0->$$14, 20.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20.0]], function-call: algebricks:le, Args:[%0->$$14, 30.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$11, %0->$$1])
@@ -43,4 +43,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
index 0e9c90f..de964ac 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
@@ -7,7 +7,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$17(ASC), $$18(ASC) ] |PARTITIONED|
order (ASC, %0->$$17) (ASC, %0->$$18)
- -- STABLE_SORT [$$17(ASC), $$18(ASC)] |LOCAL|
+ -- STABLE_SORT [$$17(ASC), $$18(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$17 := %0->$$37; $$18 := %0->$$38]) decor ([]) {
@@ -25,7 +25,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$9, $$10] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$9, $$10] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$9, $$10, $$5, $$6, $$7, $$8])
@@ -39,4 +39,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
index eddfca5..ded599c 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
@@ -5,9 +5,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -45,12 +45,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$2, $$3] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$2, $$3] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3, $$5])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -107,9 +107,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -129,7 +129,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(ASC) ] |PARTITIONED|
order (ASC, %0->$$3)
- -- STABLE_SORT [$$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$4])
@@ -175,4 +175,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
index cc47cf3..a22bf53 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
@@ -23,9 +23,9 @@
}
-- PRE_CLUSTERED_GROUP_BY[$$1, $$3] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1) (ASC, %0->$$3)
- -- STABLE_SORT [$$1(ASC), $$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC), $$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$1, $$3]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -59,9 +59,9 @@
}
-- PRE_CLUSTERED_GROUP_BY[$$1, $$3] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1) (ASC, %0->$$3)
- -- STABLE_SORT [$$1(ASC), $$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC), $$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$3])
@@ -89,11 +89,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$44(DESC), $$43(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$44) (ASC, %0->$$43)
- -- STABLE_SORT [$$44(DESC), $$43(ASC)] |LOCAL|
+ -- STABLE_SORT [$$44(DESC), $$43(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$43 := %0->$$48]) decor ([]) {
@@ -111,7 +111,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$37] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$37] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$37])
@@ -127,7 +127,7 @@
project ([$$37, $$4, $$18])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:or, Args:[function-call: algebricks:gt, Args:[%0->$$5, 1], function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$5, 1], function-call: algebricks:neq, Args:[%0->$$18, %0->$$6]]])
- -- STREAM_SELECT |UNPARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$16, %0->$$4])
@@ -150,9 +150,9 @@
-- HYBRID_HASH_JOIN [$$18][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$18] |PARTITIONED|
- project ([$$16, $$18])
+ project ([$$18, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:gt, Args:[%0->$$28, %0->$$27], function-call: algebricks:gt, Args:[%0->$$28, %0->$$27]])
+ select (function-call: algebricks:gt, Args:[%0->$$28, %0->$$27])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -221,4 +221,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
index 591576b..c5897f7 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
@@ -33,7 +33,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
select (function-call: algebricks:gt, Args:[%0->$$1, 0.0])
@@ -53,9 +53,9 @@
distinct ([%0->$$2])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
data-scan [$$2]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
@@ -73,7 +73,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$6(ASC) ] |PARTITIONED|
order (ASC, %0->$$6)
- -- STABLE_SORT [$$6(ASC)] |LOCAL|
+ -- STABLE_SORT [$$6(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$6 := %0->$$13]) decor ([]) {
@@ -91,7 +91,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$5] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$5] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$5, $$3])
@@ -133,4 +133,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
index 151f34d..6138f7a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
@@ -10,7 +10,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1, $$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS], function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -103,11 +103,11 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- PRE_CLUSTERED_GROUP_BY[$$4] |LOCAL|
+ -- PRE_CLUSTERED_GROUP_BY[$$4] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$4)
- -- STABLE_SORT [$$4(ASC)] |LOCAL|
+ -- STABLE_SORT [$$4(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$4, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.q2_minimum_cost_supplier_tmp1
@@ -125,11 +125,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$3) (ASC, %0->$$5) (ASC, %0->$$4) (ASC, %0->$$6)
- -- STABLE_SORT [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC)] |LOCAL|
+ -- STABLE_SORT [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$4, $$5, $$6, $$8, $$9, $$10, $$11])
@@ -153,4 +153,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
index a1b8e42..31c4210 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
@@ -7,11 +7,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$34(DESC) ] |PARTITIONED|
limit 10
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$34)
- -- STABLE_SORT [$$34(DESC)] |LOCAL|
+ -- STABLE_SORT [$$34(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$34, $$29, $$32])
@@ -67,4 +67,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
index 435fd7c..be5a66a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
@@ -5,9 +5,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -31,7 +31,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$11(ASC) ] |PARTITIONED|
order (ASC, %0->$$11)
- -- STABLE_SORT [$$11(ASC)] |LOCAL|
+ -- STABLE_SORT [$$11(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$11 := %0->$$16]) decor ([]) {
@@ -49,7 +49,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$7] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$7] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$7])
@@ -70,7 +70,7 @@
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
project ([$$2, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01], function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -79,4 +79,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
index 177d24c..383e550 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
@@ -5,7 +5,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$49(DESC) ] |PARTITIONED|
order (DESC, %0->$$49)
- -- STABLE_SORT [$$49(DESC)] |LOCAL|
+ -- STABLE_SORT [$$49(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$48 := %0->$$52]) decor ([]) {
@@ -23,7 +23,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$42] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$42] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$42, $$23, $$24])
@@ -106,7 +106,7 @@
-- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
project ([$$9, $$10])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
index cd9ffcd..aac9a5b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
@@ -17,12 +17,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24.0]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -31,4 +31,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
index 39f8301..c1d5b26 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
@@ -5,9 +5,9 @@
union ($$6, $$10, $$17) ($$2, $$14, $$18) ($$5, $$9, $$19) ($$1, $$13, $$20)
-- UNION_ALL |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$2, $$5, $$1])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (true)
@@ -31,9 +31,9 @@
select (function-call: algebricks:eq, Args:[%0->$$6, FRANCE])
-- STREAM_SELECT |PARTITIONED|
project ([$$5, $$6])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
assign [$$5, $$6] <- [%0->$$9, %0->$$10]
- -- ASSIGN |UNPARTITIONED|
+ -- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
@@ -47,9 +47,9 @@
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$10, $$14, $$9, $$13])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (true)
@@ -91,7 +91,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$47(ASC), $$48(ASC), $$49(ASC) ] |PARTITIONED|
order (ASC, %0->$$47) (ASC, %0->$$48) (ASC, %0->$$49)
- -- STABLE_SORT [$$47(ASC), $$48(ASC), $$49(ASC)] |LOCAL|
+ -- STABLE_SORT [$$47(ASC), $$48(ASC), $$49(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$47 := %0->$$53; $$48 := %0->$$54; $$49 := %0->$$55]) decor ([]) {
@@ -109,7 +109,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1, $$2, $$45] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1, $$2, $$45] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$2, $$45, $$46])
@@ -148,7 +148,7 @@
-- HYBRID_HASH_JOIN [$$20][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$20] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:le, Args:[%0->$$30, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -189,4 +189,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
index b807a24..b9916e2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
@@ -3,7 +3,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$63(ASC) ] |PARTITIONED|
order (ASC, %0->$$63)
- -- STABLE_SORT [$$63(ASC)] |LOCAL|
+ -- STABLE_SORT [$$63(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$63, $$66])
@@ -27,7 +27,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$61] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$61] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$61, $$62, $$2])
@@ -138,7 +138,7 @@
-- HASH_PARTITION_EXCHANGE [$$38] |PARTITIONED|
project ([$$38, $$37, $$41])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01], function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -175,7 +175,7 @@
project ([$$1, $$2])
-- STREAM_PROJECT |PARTITIONED|
assign [$$1, $$2, $$3, $$4] <- [%0->$$54, %0->$$55, %0->$$56, %0->$$57]
- -- ASSIGN |UNPARTITIONED|
+ -- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
@@ -187,4 +187,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
index f57f4a3..ecf4acb 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
@@ -5,7 +5,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$53(ASC), $$54(DESC) ] |PARTITIONED|
order (ASC, %0->$$53) (DESC, %0->$$54)
- -- STABLE_SORT [$$53(ASC), $$54(DESC)] |LOCAL|
+ -- STABLE_SORT [$$53(ASC), $$54(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$53 := %0->$$58; $$54 := %0->$$59]) decor ([]) {
@@ -23,7 +23,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$48, $$51] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$48, $$51] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$48, $$51, $$52])
@@ -121,4 +121,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
index 188aa6d..bec1353 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$9] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$9] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$9, $$5, $$6, $$7, $$8])
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
index 48e624e..2cbea4a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
@@ -6,7 +6,7 @@
-- UNNEST |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$3]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
+ data-scan [$$2, $$3, $$1]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
index b5ed12f..6a0b125 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
@@ -7,14 +7,14 @@
exchange
-- SORT_MERGE_EXCHANGE [$$4(ASC) ] |PARTITIONED|
limit 4
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$4)
- -- STABLE_SORT [$$4(ASC)] |LOCAL|
+ -- STABLE_SORT [$$4(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- select (function-call: algebricks:lt, Args:[%0->$$4, 10000])
+ select (function-call: algebricks:lt, Args:[%0->$$4, 10000.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
index ab55181..b5f1dc2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
@@ -16,37 +16,35 @@
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$25] |PARTITIONED|
- project ([$$32, $$25, $$29, $$28])
+ project ([$$25, $$28, $$29, $$32])
-- STREAM_PROJECT |PARTITIONED|
- project ([$$25, $$17, $$28, $$29, $$32])
- -- STREAM_PROJECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
- -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000], function-call: algebricks:lt, Args:[%0->$$28, 30000]])
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
+ -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000.0], function-call: algebricks:lt, Args:[%0->$$26, 5]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:lt, Args:[%0->$$17, 5])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
- project ([$$17])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$17, 5], function-call: algebricks:lt, Args:[%0->$$17, 5]])
- -- STREAM_SELECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
index 7370fcf..344898d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
@@ -3,7 +3,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$2(ASC) ] |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
select (function-call: algebricks:lt, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFOPMultiply, Args:[%0->$$1, 2], 20])
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
index a4ee677..49cdedf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
@@ -1,22 +1,66 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>768</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+
<!-- Hive Configuration can either be stored in this file or in the hadoop
configuration files -->
<!-- that are implied by Hadoop setup variables. -->
@@ -42,52 +86,6 @@
</property>
<property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>SEND_SIDE_MAT_PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.host</name>
- <value>127.0.0.1</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>13099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
-
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>2</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.framesize</name>
- <value>768</value>
- </property>
-
- <property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>1000000000</value>
<description>size per reducer.The default is 1G, i.e if the input size
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
index a7d8d9c..f886a44 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
@@ -43,7 +43,7 @@
# Note that the ConsoleHandler also has a separate level
# setting to limit messages printed to the console.
-.level= WARNING
+.level= INFO
# .level= INFO
# .level= FINE
# .level = FINEST
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
index 062f7b9..8546365 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q12_shipping
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
index ac2902c..d1eaacc 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
@@ -13,9 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-
-- the query
insert overwrite table q18_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
index 2002e1e..6badfcf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q19_discounted_revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
index 2bb90ea..32181bf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
@@ -23,8 +23,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table q20_tmp1
select distinct p_partkey
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
index 9af2dd2..838a1e8 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
index 2678f80..12ae8ae 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
@@ -17,9 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q7_volume_shipping_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
index 2e5b4a1..c491997 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
@@ -17,9 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q9_product_type_profit
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
new file mode 100644
index 0000000..6efd2ae
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
@@ -0,0 +1,8 @@
+drop table IF EXISTS nation;
+drop table IF EXISTS u8_non_mapred;
+
+create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/nation';
+create table u8_order_by (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING);
+
+insert overwrite table u8_order_by
+select * FROM nation order by N_NATIONKEY;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
new file mode 100644
index 0000000..719b246
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
@@ -0,0 +1,25 @@
+0ALGERIA0 haggle. carefully final deposits detect slyly agai
+1ARGENTINA1al foxes promise slyly according to the regular accounts. bold requests alon
+2BRAZIL1y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
+3CANADA1eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
+4EGYPT4y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
+5ETHIOPIA0ven packages wake quickly. regu
+6FRANCE3refully final requests. regular, ironi
+7GERMANY3l platelets. regular accounts x-ray: unusual, regular acco
+8INDIA2ss excuses cajole slyly across the packages. deposits print aroun
+9INDONESIA2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
+10IRAN4efully alongside of the slyly final dependencies.
+11IRAQ4nic deposits boost atop the quickly final requests? quickly regula
+12JAPAN2ously. final, express gifts cajole a
+13JORDAN4ic deposits are blithely about the carefully regular pa
+14KENYA0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
+15MOROCCO0rns. blithely bold courts among the closely regular packages use furiously bold platelets?
+16MOZAMBIQUE0s. ironic, unusual asymptotes wake blithely r
+17PERU1platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
+18CHINA2c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
+19ROMANIA3ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
+20SAUDI ARABIA4ts. silent requests haggle. closely express packages sleep across the blithely
+21VIETNAM2hely enticingly express accounts. even, final
+22RUSSIA3 requests against the platelets use never according to the quickly regular pint
+23UNITED KINGDOM3eans boost carefully special requests. accounts are. carefull
+24UNITED STATES1y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
diff --git a/hivesterix/hivesterix-optimizer/pom.xml b/hivesterix/hivesterix-optimizer/pom.xml
index ba7c7ad..858507e 100644
--- a/hivesterix/hivesterix-optimizer/pom.xml
+++ b/hivesterix/hivesterix-optimizer/pom.xml
@@ -18,7 +18,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>hivesterix-optimizer</artifactId>
@@ -44,14 +44,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-translator</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
index 959e73e..12b5986 100644
--- a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
@@ -12,115 +12,116 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.optimizer.rulecollections;
-
-import java.util.LinkedList;
-
-import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
-import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
-import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
-import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
-
-public final class HiveRuleCollections {
-
- public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- NORMALIZATION.add(new EliminateSubplanRule());
- NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
- NORMALIZATION.add(new PushSelectIntoJoinRule());
- NORMALIZATION.add(new ExtractGbyExpressionsRule());
- NORMALIZATION.add(new RemoveRedundantSelectRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
- static {
- // should LoadRecordFieldsRule be applied in only one pass over the
- // plan?
- LOAD_FIELDS.add(new InlineVariablesRule());
- // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
- LOAD_FIELDS.add(new ComplexJoinInferenceRule());
- LOAD_FIELDS.add(new InferTypesRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
- static {
- OP_PUSHDOWN.add(new PushProjectDownRule());
- OP_PUSHDOWN.add(new PushSelectDownRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- DATA_EXCHANGE.add(new SetExecutionModeRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- CONSOLIDATION.add(new RemoveRedundantProjectionRule());
- CONSOLIDATION.add(new ConsolidateSelectsRule());
- CONSOLIDATION.add(new IntroduceEarlyProjectRule());
- CONSOLIDATION.add(new ConsolidateAssignsRule());
- CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
- CONSOLIDATION.add(new IntroduceAggregateCombinerRule());
- CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
- static {
- PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
- PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
- static {
- prepareJobGenRules.add(new ReinferAllTypesRule());
- prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
- HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
- prepareJobGenRules.add(new ExtractCommonOperatorsRule());
- prepareJobGenRules.add(new LocalGroupByRule());
- prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
- prepareJobGenRules.add(new ReinferAllTypesRule());
- }
-
-}
+package edu.uci.ics.hivesterix.optimizer.rulecollections;
+
+import java.util.LinkedList;
+
+import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
+import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
+import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
+import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
+
+public final class HiveRuleCollections {
+
+ public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ NORMALIZATION.add(new EliminateSubplanRule());
+ NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
+ NORMALIZATION.add(new PushSelectIntoJoinRule());
+ NORMALIZATION.add(new ExtractGbyExpressionsRule());
+ NORMALIZATION.add(new RemoveRedundantSelectRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ // should LoadRecordFieldsRule be applied in only one pass over the
+ // plan?
+ LOAD_FIELDS.add(new InlineVariablesRule());
+ // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
+ LOAD_FIELDS.add(new ComplexJoinInferenceRule());
+ LOAD_FIELDS.add(new InferTypesRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ OP_PUSHDOWN.add(new PushProjectDownRule());
+ OP_PUSHDOWN.add(new PushSelectDownRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ DATA_EXCHANGE.add(new SetExecutionModeRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ CONSOLIDATION.add(new RemoveRedundantProjectionRule());
+ CONSOLIDATION.add(new ConsolidateSelectsRule());
+ CONSOLIDATION.add(new IntroduceEarlyProjectRule());
+ CONSOLIDATION.add(new ConsolidateAssignsRule());
+ CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
+ CONSOLIDATION.add(new IntroduceAggregateCombinerRule());
+ CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
+ HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
+ prepareJobGenRules.add(new ExtractCommonOperatorsRule());
+ prepareJobGenRules.add(new LocalGroupByRule());
+ prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new SetExecutionModeRule());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/pom.xml b/hivesterix/hivesterix-runtime/pom.xml
index e4f5416..6d10286 100644
--- a/hivesterix/hivesterix-runtime/pom.xml
+++ b/hivesterix/hivesterix-runtime/pom.xml
@@ -20,16 +20,14 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
- <dependency>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- <version>2.5</version>
- <type>jar</type>
- <scope>compile</scope>
+ <dependency>
+ <groupId>sqlline</groupId>
+ <artifactId>sqlline</artifactId>
+ <version>1_0_2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
@@ -38,285 +36,105 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
- <type>jar</type>
- <scope>compile</scope>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-hbase-handler</artifactId>
+ <version>0.11.0</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-serde</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -384,5 +202,19 @@
<id>hyracks-public-release</id>
<url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
</repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>plugins-release</id>
+ <url>http://repo.springsource.org/plugins-release</url>
+ </repository>
</repositories>
</project>
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
index a5177c9..dd4fbe7 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
@@ -34,6 +34,7 @@
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
private List<ICopyEvaluator> children;
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
index d061b23..87d2221 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
@@ -36,6 +36,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
index f4b77b8..3f1cc27 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
@@ -35,6 +35,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class AggregatuibFunctionSerializableEvaluator implements ICopySerializableAggregateFunction {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
index d91b806..b511d87 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
@@ -35,6 +35,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
index 09f0cb6..d65dc24 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
@@ -12,370 +12,383 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
- aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
- Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
- throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i].initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
- provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
- udafPartial, udafComplete, outputInspector, outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: " + this.genericUDAFName;
- }
-}
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+@SuppressWarnings("deprecation")
+public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else {
+ types.add(type);
+ }
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
+ throws AlgebricksException {
+ /**
+ * list of object inspectors correlated to types
+ */
+ List<ObjectInspector> oiListForTypes = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : types) {
+ oiListForTypes.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
+ provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
+ udafPartial, udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
index 71d11c0..c1ee814 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
@@ -39,12 +39,14 @@
import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+@SuppressWarnings("deprecation")
public class AggregationFunctionSerializableFactory implements ICopySerializableAggregateFunctionFactory {
private static final long serialVersionUID = 1L;
@@ -190,10 +192,19 @@
String s = Utilities.serializeExpression(input);
parametersSerialization.add(s);
}
+
}
@Override
public synchronized ICopySerializableAggregateFunction createAggregateFunction() throws AlgebricksException {
+ /**
+ * list of object inspectors correlated to types
+ */
+ List<ObjectInspector> oiListForTypes = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : types) {
+ oiListForTypes.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+
if (parametersOrigin == null) {
Configuration config = new Configuration();
config.setClassLoader(this.getClass().getClassLoader());
@@ -328,7 +339,8 @@
udafComplete = udafsComplete.get(threadId);
if (udafComplete == null) {
try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
} catch (HiveException e) {
throw new AlgebricksException(e);
}
@@ -352,7 +364,8 @@
udafPartial = udafsPartial.get(threadId);
if (udafPartial == null) {
try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
} catch (HiveException e) {
throw new AlgebricksException(e);
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
index 1bf4abe..833daf4 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
@@ -166,7 +166,7 @@
}
@Override
- public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException {
+ public void parse(K key, V value, IFrameWriter writer, String fileString) throws HyracksDataException {
try {
tb.reset();
if (parser != null) {
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
index e89a4c4..4bbb21f 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
@@ -12,156 +12,176 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.runtime.operator.filewrite;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-@SuppressWarnings("deprecation")
-public class HiveFileWritePushRuntime implements IPushRuntime {
-
- /**
- * frame tuple accessor to access byte buffer
- */
- private final FrameTupleAccessor accessor;
-
- /**
- * input object inspector
- */
- private final ObjectInspector inputInspector;
-
- /**
- * cachedInput
- */
- private final LazyColumnar cachedInput;
-
- /**
- * File sink operator of Hive
- */
- private final FileSinkDesc fileSink;
-
- /**
- * job configuration, which contain name node and other configuration
- * information
- */
- private JobConf conf;
-
- /**
- * input object inspector
- */
- private final Schema inputSchema;
-
- /**
- * a copy of hive schema representation
- */
- private RowSchema rowSchema;
-
- /**
- * the Hive file sink operator
- */
- private FileSinkOperator fsOp;
-
- /**
- * cached tuple object reference
- */
- private FrameTupleReference tuple = new FrameTupleReference();
-
- /**
- * @param spec
- * @param fsProvider
- */
- public HiveFileWritePushRuntime(IHyracksTaskContext context,
- RecordDescriptor inputRecordDesc, JobConf job, FileSinkDesc fs,
- RowSchema schema, Schema oi) {
- fileSink = fs;
- fileSink.setGatherStats(false);
-
- rowSchema = schema;
- conf = job;
- inputSchema = oi;
-
- accessor = new FrameTupleAccessor(context.getFrameSize(),
- inputRecordDesc);
- inputInspector = inputSchema.toObjectInspector();
- cachedInput = new LazyColumnar(
- (LazyColumnarObjectInspector) inputInspector);
- }
-
- @Override
- public void open() throws HyracksDataException {
- fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
- fsOp.setChildOperators(null);
- fsOp.setParentOperators(null);
- conf.setClassLoader(this.getClass().getClassLoader());
-
- ObjectInspector[] inspectors = new ObjectInspector[1];
- inspectors[0] = inputInspector;
- try {
- fsOp.initialize(conf, inspectors);
- fsOp.setExecContext(null);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- accessor.reset(buffer);
- int n = accessor.getTupleCount();
- try {
- for (int i = 0; i < n; ++i) {
- tuple.reset(accessor, i);
- cachedInput.init(tuple);
- fsOp.process(cachedInput, 0);
- }
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close() throws HyracksDataException {
- try {
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
- fsOp.closeOp(false);
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void setFrameWriter(int index, IFrameWriter writer,
- RecordDescriptor recordDesc) {
- throw new IllegalStateException();
- }
-
- @Override
- public void setInputRecordDescriptor(int index,
- RecordDescriptor recordDescriptor) {
- }
-
- @Override
- public void fail() throws HyracksDataException {
-
- }
-
-}
+package edu.uci.ics.hivesterix.runtime.operator.filewrite;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+
+@SuppressWarnings("deprecation")
+public class HiveFileWritePushRuntime implements IPushRuntime {
+ private final static Logger LOGGER = Logger.getLogger(HiveFileWritePushRuntime.class.getName());
+
+ /**
+ * frame tuple accessor to access byte buffer
+ */
+ private final FrameTupleAccessor accessor;
+
+ /**
+ * input object inspector
+ */
+ private final ObjectInspector inputInspector;
+
+ /**
+ * cachedInput
+ */
+ private final LazyColumnar cachedInput;
+
+ /**
+ * File sink operator of Hive
+ */
+ private final FileSinkDesc fileSink;
+
+ /**
+ * job configuration, which contain name node and other configuration
+ * information
+ */
+ private JobConf conf;
+
+ /**
+ * input object inspector
+ */
+ private final Schema inputSchema;
+
+ /**
+ * a copy of hive schema representation
+ */
+ private RowSchema rowSchema;
+
+ /**
+ * the Hive file sink operator
+ */
+ private FileSinkOperator fsOp;
+
+ /**
+ * cached tuple object reference
+ */
+ private FrameTupleReference tuple = new FrameTupleReference();
+
+ /**
+ * @param spec
+ * @param fsProvider
+ */
+ public HiveFileWritePushRuntime(IHyracksTaskContext context, RecordDescriptor inputRecordDesc, JobConf job,
+ FileSinkDesc fs, RowSchema schema, Schema oi) {
+ fileSink = fs;
+ fileSink.setGatherStats(false);
+
+ rowSchema = schema;
+ conf = job;
+ inputSchema = oi;
+
+ accessor = new FrameTupleAccessor(context.getFrameSize(), inputRecordDesc);
+ inputInspector = inputSchema.toObjectInspector();
+ cachedInput = new LazyColumnar((LazyColumnarObjectInspector) inputInspector);
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
+ fsOp.setChildOperators(null);
+ fsOp.setParentOperators(null);
+ conf.setClassLoader(this.getClass().getClassLoader());
+
+ ObjectInspector[] inspectors = new ObjectInspector[1];
+ inspectors[0] = inputInspector;
+ try {
+ fsOp.initialize(conf, inspectors);
+ fsOp.setExecContext(null);
+ createTempDir();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int n = accessor.getTupleCount();
+ try {
+ for (int i = 0; i < n; ++i) {
+ tuple.reset(accessor, i);
+ cachedInput.init(tuple);
+ fsOp.process(cachedInput, 0);
+ }
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ try {
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ fsOp.closeOp(false);
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void setFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void setInputRecordDescriptor(int index, RecordDescriptor recordDescriptor) {
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+
+ }
+
+ private void createTempDir() throws IOException {
+ FileSinkDesc fdesc = fsOp.getConf();
+ String tempDir = fdesc.getDirName();
+ if (tempDir != null) {
+ Path tempPath = Utilities.toTempPath(new Path(tempDir));
+ FileSystem fs = tempPath.getFileSystem(conf);
+ if (!fs.exists(tempPath)) {
+ try {
+ fs.mkdirs(tempPath);
+ ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, tempPath);
+ } catch (IOException e) {
+ //if the dir already exists, that should be fine; so log a warning msg
+ LOGGER.warning("create tmp result directory fails.");
+ }
+ }
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/pom.xml b/hivesterix/hivesterix-serde/pom.xml
index 97c9174..b53661b 100644
--- a/hivesterix/hivesterix-serde/pom.xml
+++ b/hivesterix/hivesterix-serde/pom.xml
@@ -18,7 +18,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>hivesterix-serde</artifactId>
@@ -42,37 +42,37 @@
<dependencies>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>3.8.1</version>
+ <version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
index b5d64e8..2bbb1d5 100644
--- a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
@@ -46,6 +46,7 @@
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -74,6 +75,7 @@
* deserialized until required. Binary means a field is serialized in binary
* compact format.
*/
+@SuppressWarnings("deprecation")
public class LazySerDe implements SerDe {
public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
@@ -140,7 +142,6 @@
/**
* Deserialize a table record to a Lazy struct.
*/
- @SuppressWarnings("deprecation")
@Override
public Object deserialize(Writable field) throws SerDeException {
if (byteArrayRef == null) {
@@ -471,4 +472,9 @@
}
}
}
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ return null;
+ }
}
diff --git a/hivesterix/hivesterix-translator/pom.xml b/hivesterix/hivesterix-translator/pom.xml
index 8a24d5e..d8c205f 100644
--- a/hivesterix/hivesterix-translator/pom.xml
+++ b/hivesterix/hivesterix-translator/pom.xml
@@ -21,7 +21,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -42,30 +42,23 @@
<dependencies>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-runtime</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
index f32d85b..76cc51d 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
@@ -12,810 +12,809 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan;
-
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
-
-@SuppressWarnings("rawtypes")
-public class HiveAlgebricksTranslator implements Translator {
-
- private int currentVariable = 0;
-
- private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
-
- private boolean continueTraverse = true;
-
- private IMetadataProvider<PartitionDesc, Object> metaData;
-
- /**
- * map variable name to the logical variable
- */
- private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map field name to LogicalVariable
- */
- private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map logical variable to name
- */
- private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
-
- /**
- * asterix root operators
- */
- private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
-
- /**
- * a list of visitors
- */
- private List<Visitor> visitors = new ArrayList<Visitor>();
-
- /**
- * output writer to print things out
- */
- private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));
-
- /**
- * map a logical variable to type info
- */
- private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
-
- @Override
- public LogicalVariable getVariable(String fieldName, TypeInfo type) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- currentVariable++;
- var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- }
- return var;
- }
-
- @Override
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
- currentVariable++;
- LogicalVariable var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- return var;
- }
-
- @Override
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
- String name = this.logicalVariableToFieldMap.get(oldVar);
- if (name != null) {
- fieldToLogicalVariableMap.put(name, newVar);
- nameToLogicalVariableMap.put(newVar.toString(), newVar);
- nameToLogicalVariableMap.put(oldVar.toString(), newVar);
- logicalVariableToFieldMap.put(newVar, name);
- }
- }
-
- @Override
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
- return metaData;
- }
-
- /**
- * only get an variable, without rewriting it
- *
- * @param fieldName
- * @return
- */
- private LogicalVariable getVariableOnly(String fieldName) {
- return fieldToLogicalVariableMap.get(fieldName);
- }
-
- private void updateVariable(String fieldName, LogicalVariable variable) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- } else if (!var.equals(variable)) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- }
- }
-
- /**
- * get a list of logical variables from the schema
- *
- * @param schema
- * @return
- */
- @Override
- public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
- List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- List<String> names = schema.getNames();
-
- for (String name : names)
- variables.add(nameToLogicalVariableMap.get(name));
- return variables;
- }
-
- /**
- * get variable to typeinfo map
- *
- * @return
- */
- public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
- return this.variableToType;
- }
-
- /**
- * get the number of variables s
- *
- * @return
- */
- public int getVariableCounter() {
- return currentVariable + 1;
- }
-
- /**
- * translate from hive operator tree to asterix operator tree
- *
- * @param hive
- * roots
- * @return Algebricks roots
- */
- public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,
- HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {
- /**
- * register visitors
- */
- visitors.add(new FilterVisitor());
- visitors.add(new GroupByVisitor());
- visitors.add(new JoinVisitor());
- visitors.add(new LateralViewJoinVisitor());
- visitors.add(new UnionVisitor());
- visitors.add(new LimitVisitor());
- visitors.add(new MapJoinVisitor());
- visitors.add(new ProjectVisitor());
- visitors.add(new SortVisitor());
- visitors.add(new ExtractVisitor());
- visitors.add(new TableScanWriteVisitor(aliasToPathMap));
-
- List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(
- parentOperator));
- insertReplicateOperator(refList);
- if (refList != null)
- rootOperators.addAll(refList);
- }
-
- /**
- * translate operator DAG
- *
- * @param hiveRoot
- * @param AlgebricksParentOperator
- * @return
- */
- private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
- Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {
-
- for (Operator hiveOperator : hiveRoot) {
- continueTraverse = true;
- Mutable<ILogicalOperator> currentOperatorRef = null;
- if (hiveOperator.getType() == OperatorType.FILTER) {
- FilterOperator fop = (FilterOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
- ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.JOIN) {
- JoinOperator fop = (JoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
- LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
- MapJoinOperator fop = (MapJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.SELECT) {
- SelectOperator fop = (SelectOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
- ExtractOperator fop = (ExtractOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
- GroupByOperator fop = (GroupByOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
- TableScanOperator fop = (TableScanOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.FILESINK) {
- FileSinkOperator fop = (FileSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.LIMIT) {
- LimitOperator lop = (LimitOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UDTF) {
- UDTFOperator lop = (UDTFOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UNION) {
- UnionOperator lop = (UnionOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- } else
- ;
- if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0
- && continueTraverse) {
- @SuppressWarnings("unchecked")
- List<Operator> children = hiveOperator.getChildOperators();
- if (currentOperatorRef == null)
- currentOperatorRef = AlgebricksParentOperator;
- translate(children, currentOperatorRef);
- }
- if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)
- logicalOp.add(currentOperatorRef);
- }
- return logicalOp;
- }
-
- /**
- * used in select, group by to get no-column-expression columns
- *
- * @param cols
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
- ArrayList<LogicalVariable> variables) {
-
- ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
-
- /**
- * variables to be appended in the assign operator
- */
- ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
-
- // one variable can only be assigned once
- for (ExprNodeDesc hiveExpr : cols) {
- rewriteExpression(hiveExpr);
-
- if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
- String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();
-
- // System.out.println("project expr: " + fieldName);
-
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());
- desc2.setColumn(var.toString());
- desc2.setTabAlias("");
- variables.add(var);
- } else {
- LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());
- String name = this.logicalVariableToFieldMap.get(var);
- var = this.getVariableOnly(name);
- variables.add(var);
- }
- } else {
- Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
- expressions.add(asterixExpr);
- LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),
- hiveExpr.getTypeInfo());
- variables.add(var);
- appendedVariables.add(var);
- }
- }
-
- /**
- * create an assign operator to deal with appending
- */
- ILogicalOperator assignOp = null;
- if (appendedVariables.size() > 0) {
- assignOp = new AssignOperator(appendedVariables, expressions);
- assignOp.getInputs().add(parent);
- }
- return assignOp;
- }
-
- private ILogicalPlan plan;
-
- public ILogicalPlan genLogicalPlan() {
- plan = new ALogicalPlanImpl(rootOperators);
- return plan;
- }
-
- public void printOperators() throws AlgebricksException {
- LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
- StringBuilder buffer = new StringBuilder();
- PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
- outputWriter.println(buffer);
- outputWriter.println("rewritten variables: ");
- outputWriter.flush();
- printVariables();
-
- }
-
- public static void setOutputPrinter(PrintWriter writer) {
- outputWriter = writer;
- }
-
- private void printVariables() {
- Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();
-
- for (Entry<String, LogicalVariable> entry : entries) {
- outputWriter.println(entry.getKey() + " -> " + entry.getValue());
- }
- outputWriter.flush();
- }
-
- /**
- * generate the object inspector for the output of an operator
- *
- * @param operator
- * The Hive operator
- * @return an ObjectInspector object
- */
- public Schema generateInputSchema(Operator operator) {
- List<String> variableNames = new ArrayList<String>();
- List<TypeInfo> typeList = new ArrayList<TypeInfo>();
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo col : columns) {
- // typeList.add();
- TypeInfo type = col.getType();
- typeList.add(type);
-
- String fieldName = col.getInternalName();
- variableNames.add(fieldName);
- }
-
- return new Schema(variableNames, typeList);
- }
-
- /**
- * rewrite the names of output columns for feature expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator) {
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo column : columns) {
- String columnName = column.getTabAlias() + "." + column.getInternalName();
- if (columnName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(columnName, column.getType());
- column.setInternalName(var.toString());
- }
- }
- }
-
- @Override
- public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
-
- //printOperatorSchema(operator);
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- if (variables.size() != columns.size()) {
- throw new IllegalStateException("output cardinality error " + operator.getName() + " variable size: "
- + variables.size() + " expected " + columns.size());
- }
-
- for (int i = 0; i < variables.size(); i++) {
- LogicalVariable var = variables.get(i);
- ColumnInfo column = columns.get(i);
- String fieldName = column.getTabAlias() + "." + column.getInternalName();
- if (fieldName.indexOf("$$") < 0) {
- updateVariable(fieldName, var);
- column.setInternalName(var.toString());
- }
- }
- //printOperatorSchema(operator);
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "null." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- throw new IllegalStateException(fieldName + " is wrong!!! ");
- }
- }
- }
- String name = this.logicalVariableToFieldMap.get(var);
- var = getVariableOnly(name);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpression(desc);
- }
- }
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpressionPartial(desc);
- }
- }
- }
-
- // private void printOperatorSchema(Operator operator) {
- // // System.out.println(operator.getName());
- // // List<ColumnInfo> columns = operator.getSchema().getSignature();
- // // for (ColumnInfo column : columns) {
- // // System.out.print(column.getTabAlias() + "." +
- // // column.getInternalName() + " ");
- // // }
- // // System.out.println();
- // }
-
- /**
- * translate scalar function expression
- *
- * @param hiveExpr
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {
- ILogicalExpression AlgebricksExpr;
-
- if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = hiveExpr.getChildren();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
- GenericUDF genericUdf = funcExpr.getGenericUDF();
- UDF udf = null;
- if (genericUdf instanceof GenericUDFBridge) {
- GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
- try {
- udf = bridge.getUdfClass().newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- /**
- * set up the hive function
- */
- Object hiveFunction = genericUdf;
- if (udf != null)
- hiveFunction = udf;
-
- FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction
- .getClass());
- if (funcId == null) {
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());
- }
-
- Object functionInfo = null;
- if (genericUdf instanceof GenericUDFBridge) {
- functionInfo = funcExpr;
- }
-
- /**
- * generate the function call expression
- */
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, functionInfo), arguments);
- AlgebricksExpr = AlgebricksFuncExpr;
-
- } else if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
- LogicalVariable var = this.getVariable(column.getColumn());
- AlgebricksExpr = new VariableReferenceExpression(var);
-
- } else if (hiveExpr instanceof ExprNodeFieldDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, hiveExpr));
- AlgebricksExpr = AlgebricksFuncExpr;
- } else if (hiveExpr instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
- Object value = hiveConst.getValue();
- AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));
- } else if (hiveExpr instanceof ExprNodeNullDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, hiveExpr));
-
- AlgebricksExpr = AlgebricksFuncExpr;
- } else {
- throw new IllegalStateException("unknown hive expression");
- }
- return new MutableObject<ILogicalExpression>(AlgebricksExpr);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {
-
- String UDAFName = aggregateDesc.getGenericUDAFName();
-
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = aggregateDesc.getParameters();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
- + aggregateDesc.getMode() + ")");
- HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
- AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,
- arguments);
- return new MutableObject<ILogicalExpression>(aggregationExpression);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
-
- String UDTFName = udtfDesc.getUDTFName();
-
- FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);
- UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(
- funcId, udtfDesc));
- unnestingExpression.getArguments().add(argument);
- return new MutableObject<ILogicalExpression>(unnestingExpression);
- }
-
- /**
- * get typeinfo
- */
- @Override
- public TypeInfo getType(LogicalVariable var) {
- return variableToType.get(var);
- }
-
- /**
- * get variable from variable name
- */
- @Override
- public LogicalVariable getVariable(String name) {
- return nameToLogicalVariableMap.get(name);
- }
-
- @Override
- public LogicalVariable getVariableFromFieldName(String fieldName) {
- return this.getVariableOnly(fieldName);
- }
-
- /**
- * set the metadata provider
- */
- @Override
- public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {
- this.metaData = metadata;
- }
-
- /**
- * insert ReplicateOperator when necessary
- */
- private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
- buildChildToParentsMapping(roots, childToParentsMap);
- for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {
- List<Mutable<ILogicalOperator>> pList = entry.getValue();
- if (pList.size() > 1) {
- ILogicalOperator rop = new ReplicateOperator(pList.size());
- Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);
- Mutable<ILogicalOperator> childRef = entry.getKey();
- rop.getInputs().add(childRef);
- for (Mutable<ILogicalOperator> parentRef : pList) {
- ILogicalOperator parentOp = parentRef.getValue();
- int index = parentOp.getInputs().indexOf(childRef);
- parentOp.getInputs().set(index, ropRef);
- }
- }
- }
- }
-
- /**
- * build the mapping from child to Parents
- *
- * @param roots
- * @param childToParentsMap
- */
- private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
- for (Mutable<ILogicalOperator> opRef : roots) {
- List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();
- for (Mutable<ILogicalOperator> childRef : childRefs) {
- List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
- if (parentList == null) {
- parentList = new ArrayList<Mutable<ILogicalOperator>>();
- map.put(childRef, parentList);
- }
- if (!parentList.contains(opRef))
- parentList.add(opRef);
- }
- buildChildToParentsMapping(childRefs, map);
- }
- }
-}
+package edu.uci.ics.hivesterix.logical.plan;
+
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
+import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
+import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
+import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
+
+@SuppressWarnings("rawtypes")
+public class HiveAlgebricksTranslator implements Translator {
+
+ private int currentVariable = 0;
+
+ private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
+
+ private boolean continueTraverse = true;
+
+ private IMetadataProvider<PartitionDesc, Object> metaData;
+
+ /**
+ * map variable name to the logical variable
+ */
+ private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map field name to LogicalVariable
+ */
+ private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map logical variable to name
+ */
+ private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
+
+ /**
+ * asterix root operators
+ */
+ private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
+
+ /**
+ * a list of visitors
+ */
+ private List<Visitor> visitors = new ArrayList<Visitor>();
+
+ /**
+ * output writer to print things out
+ */
+ private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));
+
+ /**
+ * map a logical variable to type info
+ */
+ private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
+
+ @Override
+ public LogicalVariable getVariable(String fieldName, TypeInfo type) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ currentVariable++;
+ var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ }
+ return var;
+ }
+
+ @Override
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
+ currentVariable++;
+ LogicalVariable var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ return var;
+ }
+
+ @Override
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
+ String name = this.logicalVariableToFieldMap.get(oldVar);
+ if (name != null) {
+ fieldToLogicalVariableMap.put(name, newVar);
+ nameToLogicalVariableMap.put(newVar.toString(), newVar);
+ nameToLogicalVariableMap.put(oldVar.toString(), newVar);
+ logicalVariableToFieldMap.put(newVar, name);
+ }
+ }
+
+ @Override
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
+ return metaData;
+ }
+
+ /**
+ * only get an variable, without rewriting it
+ *
+ * @param fieldName
+ * @return
+ */
+ private LogicalVariable getVariableOnly(String fieldName) {
+ return fieldToLogicalVariableMap.get(fieldName);
+ }
+
+ public void updateVariable(String fieldName, LogicalVariable variable) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ } else if (!var.equals(variable)) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ }
+ }
+
+ /**
+ * get a list of logical variables from the schema
+ *
+ * @param schema
+ * @return
+ */
+ @Override
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
+ List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ List<String> names = schema.getNames();
+
+ for (String name : names)
+ variables.add(nameToLogicalVariableMap.get(name));
+ return variables;
+ }
+
+ /**
+ * get variable to typeinfo map
+ *
+ * @return
+ */
+ public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
+ return this.variableToType;
+ }
+
+ /**
+ * get the number of variables s
+ *
+ * @return
+ */
+ public int getVariableCounter() {
+ return currentVariable + 1;
+ }
+
+ /**
+ * translate from hive operator tree to asterix operator tree
+ *
+ * @param hive
+ * roots
+ * @return Algebricks roots
+ */
+ public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,
+ HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {
+ /**
+ * register visitors
+ */
+ visitors.add(new FilterVisitor());
+ visitors.add(new GroupByVisitor());
+ visitors.add(new JoinVisitor());
+ visitors.add(new LateralViewJoinVisitor());
+ visitors.add(new UnionVisitor());
+ visitors.add(new LimitVisitor());
+ visitors.add(new MapJoinVisitor());
+ visitors.add(new ProjectVisitor());
+ visitors.add(new SortVisitor());
+ visitors.add(new ExtractVisitor());
+ visitors.add(new TableScanWriteVisitor(aliasToPathMap));
+
+ List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(
+ parentOperator));
+ insertReplicateOperator(refList);
+ if (refList != null)
+ rootOperators.addAll(refList);
+ }
+
+ /**
+ * translate operator DAG
+ *
+ * @param hiveRoot
+ * @param AlgebricksParentOperator
+ * @return
+ */
+ private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
+ Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {
+
+ for (Operator hiveOperator : hiveRoot) {
+ continueTraverse = true;
+ Mutable<ILogicalOperator> currentOperatorRef = null;
+ if (hiveOperator.getType() == OperatorType.FILTER) {
+ FilterOperator fop = (FilterOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
+ ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.JOIN) {
+ JoinOperator fop = (JoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
+ LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
+ MapJoinOperator fop = (MapJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.SELECT) {
+ SelectOperator fop = (SelectOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
+ ExtractOperator fop = (ExtractOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
+ GroupByOperator fop = (GroupByOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
+ TableScanOperator fop = (TableScanOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.FILESINK) {
+ FileSinkOperator fop = (FileSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.LIMIT) {
+ LimitOperator lop = (LimitOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UDTF) {
+ UDTFOperator lop = (UDTFOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UNION) {
+ UnionOperator lop = (UnionOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ } else
+ ;
+ if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0
+ && continueTraverse) {
+ @SuppressWarnings("unchecked")
+ List<Operator> children = hiveOperator.getChildOperators();
+ if (currentOperatorRef == null)
+ currentOperatorRef = AlgebricksParentOperator;
+ translate(children, currentOperatorRef);
+ }
+ if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)
+ logicalOp.add(currentOperatorRef);
+ }
+ return logicalOp;
+ }
+
+ /**
+ * used in select, group by to get no-column-expression columns
+ *
+ * @param cols
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables) {
+
+ ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
+
+ /**
+ * variables to be appended in the assign operator
+ */
+ ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
+
+ // one variable can only be assigned once
+ for (ExprNodeDesc hiveExpr : cols) {
+ rewriteExpression(hiveExpr);
+
+ if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
+ String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();
+
+ // System.out.println("project expr: " + fieldName);
+
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());
+ desc2.setColumn(var.toString());
+ desc2.setTabAlias("");
+ variables.add(var);
+ } else {
+ LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = this.getVariableOnly(name);
+ variables.add(var);
+ }
+ } else {
+ Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
+ expressions.add(asterixExpr);
+ LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),
+ hiveExpr.getTypeInfo());
+ variables.add(var);
+ appendedVariables.add(var);
+ }
+ }
+
+ /**
+ * create an assign operator to deal with appending
+ */
+ ILogicalOperator assignOp = null;
+ if (appendedVariables.size() > 0) {
+ assignOp = new AssignOperator(appendedVariables, expressions);
+ assignOp.getInputs().add(parent);
+ }
+ return assignOp;
+ }
+
+ private ILogicalPlan plan;
+
+ public ILogicalPlan genLogicalPlan() {
+ plan = new ALogicalPlanImpl(rootOperators);
+ return plan;
+ }
+
+ public void printOperators() throws AlgebricksException {
+ LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
+ StringBuilder buffer = new StringBuilder();
+ PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
+ outputWriter.println(buffer);
+ outputWriter.println("rewritten variables: ");
+ outputWriter.flush();
+ printVariables();
+
+ }
+
+ public static void setOutputPrinter(PrintWriter writer) {
+ outputWriter = writer;
+ }
+
+ private void printVariables() {
+ Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();
+
+ for (Entry<String, LogicalVariable> entry : entries) {
+ outputWriter.println(entry.getKey() + " -> " + entry.getValue());
+ }
+ outputWriter.flush();
+ }
+
+ /**
+ * generate the object inspector for the output of an operator
+ *
+ * @param operator
+ * The Hive operator
+ * @return an ObjectInspector object
+ */
+ public Schema generateInputSchema(Operator operator) {
+ List<String> variableNames = new ArrayList<String>();
+ List<TypeInfo> typeList = new ArrayList<TypeInfo>();
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+
+ for (ColumnInfo col : columns) {
+ // typeList.add();
+ TypeInfo type = col.getType();
+ typeList.add(type);
+
+ String fieldName = col.getInternalName();
+ variableNames.add(fieldName);
+ }
+
+ return new Schema(variableNames, typeList);
+ }
+
+ /**
+ * rewrite the names of output columns for feature expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator) {
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ for (ColumnInfo column : columns) {
+ String columnName = column.getTabAlias() + "." + column.getInternalName();
+ if (columnName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(columnName, column.getType());
+ column.setInternalName(var.toString());
+ }
+ }
+ }
+
+ @Override
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
+ // printOperatorSchema(operator);
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // if (variables.size() != columns.size()) {
+ // throw new IllegalStateException("output cardinality error " +
+ // operator.getName() + " variable size: "
+ // + variables.size() + " expected " + columns.size());
+ // }
+ for (int i = 0; i < variables.size(); i++) {
+ LogicalVariable var = variables.get(i);
+ ColumnInfo column = columns.get(i);
+ String fieldName = column.getTabAlias() + "." + column.getInternalName();
+ if (fieldName.indexOf("$$") < 0) {
+ updateVariable(fieldName, var);
+ column.setInternalName(var.toString());
+ }
+ }
+
+ // printOperatorSchema(operator);
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "null." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ throw new IllegalStateException(fieldName + " is wrong!!! ");
+ }
+ }
+ }
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = getVariableOnly(name);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpression(desc);
+ }
+ }
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpressionPartial(desc);
+ }
+ }
+ }
+
+ // private void printOperatorSchema(Operator operator) {
+ // // System.out.println(operator.getName());
+ // // List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // // for (ColumnInfo column : columns) {
+ // // System.out.print(column.getTabAlias() + "." +
+ // // column.getInternalName() + " ");
+ // // }
+ // // System.out.println();
+ // }
+
+ /**
+ * translate scalar function expression
+ *
+ * @param hiveExpr
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {
+ ILogicalExpression AlgebricksExpr;
+
+ if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = hiveExpr.getChildren();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
+ GenericUDF genericUdf = funcExpr.getGenericUDF();
+ UDF udf = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
+ try {
+ udf = bridge.getUdfClass().newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * set up the hive function
+ */
+ Object hiveFunction = genericUdf;
+ if (udf != null)
+ hiveFunction = udf;
+
+ FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction
+ .getClass());
+ if (funcId == null) {
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());
+ }
+
+ Object functionInfo = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ functionInfo = funcExpr;
+ }
+
+ /**
+ * generate the function call expression
+ */
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, functionInfo), arguments);
+ AlgebricksExpr = AlgebricksFuncExpr;
+
+ } else if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
+ LogicalVariable var = this.getVariable(column.getColumn());
+ AlgebricksExpr = new VariableReferenceExpression(var);
+
+ } else if (hiveExpr instanceof ExprNodeFieldDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else if (hiveExpr instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
+ Object value = hiveConst.getValue();
+ AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));
+ } else if (hiveExpr instanceof ExprNodeNullDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else {
+ throw new IllegalStateException("unknown hive expression");
+ }
+ return new MutableObject<ILogicalExpression>(AlgebricksExpr);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {
+
+ String UDAFName = aggregateDesc.getGenericUDAFName();
+
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = aggregateDesc.getParameters();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + aggregateDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,
+ arguments);
+ return new MutableObject<ILogicalExpression>(aggregationExpression);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
+
+ String UDTFName = udtfDesc.getUDTFName();
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);
+ UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(
+ funcId, udtfDesc));
+ unnestingExpression.getArguments().add(argument);
+ return new MutableObject<ILogicalExpression>(unnestingExpression);
+ }
+
+ /**
+ * get typeinfo
+ */
+ @Override
+ public TypeInfo getType(LogicalVariable var) {
+ return variableToType.get(var);
+ }
+
+ /**
+ * get variable from variable name
+ */
+ @Override
+ public LogicalVariable getVariable(String name) {
+ return nameToLogicalVariableMap.get(name);
+ }
+
+ @Override
+ public LogicalVariable getVariableFromFieldName(String fieldName) {
+ return this.getVariableOnly(fieldName);
+ }
+
+ /**
+ * set the metadata provider
+ */
+ @Override
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {
+ this.metaData = metadata;
+ }
+
+ /**
+ * insert ReplicateOperator when necessary
+ */
+ private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
+ buildChildToParentsMapping(roots, childToParentsMap);
+ for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {
+ List<Mutable<ILogicalOperator>> pList = entry.getValue();
+ if (pList.size() > 1) {
+ ILogicalOperator rop = new ReplicateOperator(pList.size());
+ Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);
+ Mutable<ILogicalOperator> childRef = entry.getKey();
+ rop.getInputs().add(childRef);
+ for (Mutable<ILogicalOperator> parentRef : pList) {
+ ILogicalOperator parentOp = parentRef.getValue();
+ int index = parentOp.getInputs().indexOf(childRef);
+ parentOp.getInputs().set(index, ropRef);
+ }
+ }
+ }
+ }
+
+ /**
+ * build the mapping from child to Parents
+ *
+ * @param roots
+ * @param childToParentsMap
+ */
+ private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
+ for (Mutable<ILogicalOperator> opRef : roots) {
+ List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();
+ for (Mutable<ILogicalOperator> childRef : childRefs) {
+ List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
+ if (parentList == null) {
+ parentList = new ArrayList<Mutable<ILogicalOperator>>();
+ map.put(childRef, parentList);
+ }
+ if (!parentList.contains(opRef))
+ parentList.add(opRef);
+ }
+ buildChildToParentsMapping(childRefs, map);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
index f4161a4..aa1837c 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
@@ -12,113 +12,145 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-/**
- * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
- * This operator was implemented with the following operator DAG in mind.
- * For a query such as
- * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
- * adid
- * The top of the operator DAG will look similar to
- * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
- * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
- * ....
- * Rows from the table scan operator are first to a lateral view forward
- * operator that just forwards the row and marks the start of a LV. The select
- * operator on the left picks all the columns while the select operator on the
- * right picks only the columns needed by the UDTF.
- * The output of select in the left branch and output of the UDTF in the right
- * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
- * will generate > 1 row for every row received from the TS, while the left
- * select operator will generate only one. For each row output from the TS, the
- * LVJ outputs all possible rows that can be created by joining the row from the
- * left select and one of the rows output from the UDTF.
- * Additional lateral views can be supported by adding a similar DAG after the
- * previous LVJ operator.
- */
-
-@SuppressWarnings("rawtypes")
-public class LateralViewJoinVisitor extends DefaultVisitor {
-
- private UDTFDesc udtf;
-
- private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
-
- parents.add(AlgebricksParentOperatorRef);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- Operator parent0 = operator.getParentOperators().get(0);
- ILogicalOperator parentOperator;
- ILogicalExpression unnestArg;
- if (parent0 instanceof UDTFOperator) {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(1).getValue(), unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(1).getValue();
- } else {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(0).getValue(), unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(0).getValue();
- }
-
- LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
-
- Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
- unnestArg));
- ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
-
- List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parentOperator, outputVars);
- outputVars.add(var);
- currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(parentOperator));
-
- parents.clear();
- udtf = null;
- t.rewriteOperatorOutputSchema(outputVars, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
- udtf = (UDTFDesc) operator.getConf();
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+/**
+ * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
+ * This operator was implemented with the following operator DAG in mind.
+ * For a query such as
+ * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
+ * adid
+ * The top of the operator DAG will look similar to
+ * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
+ * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
+ * ....
+ * Rows from the table scan operator are first to a lateral view forward
+ * operator that just forwards the row and marks the start of a LV. The select
+ * operator on the left picks all the columns while the select operator on the
+ * right picks only the columns needed by the UDTF.
+ * The output of select in the left branch and output of the UDTF in the right
+ * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
+ * will generate > 1 row for every row received from the TS, while the left
+ * select operator will generate only one. For each row output from the TS, the
+ * LVJ outputs all possible rows that can be created by joining the row from the
+ * left select and one of the rows output from the UDTF.
+ * Additional lateral views can be supported by adding a similar DAG after the
+ * previous LVJ operator.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LateralViewJoinVisitor extends DefaultVisitor {
+
+ private UDTFDesc udtf;
+
+ private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperatorRef);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ ILogicalOperator parentOperator = null;
+ ILogicalExpression unnestArg = null;
+ List<LogicalVariable> projectVariables = new ArrayList<LogicalVariable>();
+ for (Mutable<ILogicalOperator> parentLOpRef : parents) {
+ VariableUtilities.getLiveVariables(parentLOpRef.getValue(), projectVariables);
+ }
+ for (Operator parentOp : operator.getParentOperators()) {
+ if (parentOp instanceof UDTFOperator) {
+ int index = operator.getParentOperators().indexOf(parentOp);
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(index).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(index).getValue();
+ }
+ }
+
+ LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
+ Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
+ unnestArg));
+ ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
+
+ List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), outputVars);
+ outputVars.add(var);
+ ILogicalOperator inputProjectOperator = new ProjectOperator(projectVariables);
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(inputProjectOperator));
+ inputProjectOperator.getInputs().addAll(parentOperator.getInputs());
+
+ parents.clear();
+ udtf = null;
+ List<ColumnInfo> inputSchema = operator.getSchema().getSignature();
+ rewriteOperatorDesc(outputVars, operator.getConf(), inputSchema, t);
+ //t.rewriteOperatorOutputSchema(outputVars, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ udtf = (UDTFDesc) operator.getConf();
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+ private void rewriteOperatorDesc(List<LogicalVariable> variables, LateralViewJoinDesc desc,
+ List<ColumnInfo> schema, Translator t) {
+ List<String> outputFieldNames = desc.getOutputInternalColNames();
+ for (int i = 0; i < variables.size(); i++) {
+ LogicalVariable var = variables.get(i);
+ String fieldName = outputFieldNames.get(i);
+ String tabAlias = findTabAlias(fieldName, schema);
+ fieldName = tabAlias + "." + fieldName;
+ if (fieldName.indexOf("$$") < 0) {
+ //outputFieldNames.set(i, var.toString());
+ t.updateVariable(fieldName, var);
+ }
+ }
+ }
+
+ private String findTabAlias(String fieldName, List<ColumnInfo> schema) {
+ for (int i = 0; i < schema.size(); i++) {
+ ColumnInfo column = schema.get(i);
+ if (column.getInternalName().equals(fieldName)) {
+ return column.getTabAlias();
+ }
+ }
+ return "null";
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
index 186b291..3ed9786 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
@@ -14,36 +14,36 @@
*/
package edu.uci.ics.hivesterix.logical.plan.visitor;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
@SuppressWarnings("rawtypes")
public class MapJoinVisitor extends DefaultVisitor {
@@ -56,7 +56,7 @@
@Override
public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- List<Operator<? extends Serializable>> joinSrc = operator.getParentOperators();
+ List<Operator<? extends OperatorDesc>> joinSrc = operator.getParentOperators();
List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
if (parents == null) {
parents = new ArrayList<Mutable<ILogicalOperator>>();
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
index 74cebaa..25abdec 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
@@ -69,9 +69,9 @@
@Override
public Mutable<ILogicalOperator> visit(TableScanOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
TableScanDesc desc = (TableScanDesc) operator.getConf();
- if (desc == null) {
+ if (desc == null || desc.getAlias()==null) {
List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
VariableUtilities.getLiveVariables(AlgebricksParentOperator.getValue(), schema);
t.rewriteOperatorOutputSchema(schema, operator);
@@ -124,7 +124,6 @@
@Override
public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0)
return null;
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
index 1cb5121..c710f3f 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
@@ -14,21 +14,21 @@
*/
package edu.uci.ics.hivesterix.logical.plan.visitor;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
public class UnionVisitor extends DefaultVisitor {
@@ -46,8 +46,8 @@
List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
- VariableUtilities.getUsedVariables(parents.get(1).getValue(), rightVars);
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), leftVars);
+ VariableUtilities.getLiveVariables(parents.get(1).getValue(), rightVars);
List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
index 32b0f66..5b6ac50 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
@@ -12,173 +12,181 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings("rawtypes")
-public interface Translator {
-
- /**
- * generate input schema
- *
- * @param operator
- * @return
- */
- public Schema generateInputSchema(Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr);
-
- /**
- * get an assign operator as a child of parent
- *
- * @param parent
- * @param cols
- * @param variables
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
- ArrayList<LogicalVariable> variables);
-
- /**
- * get type for a logical variable
- *
- * @param var
- * @return type info
- */
- public TypeInfo getType(LogicalVariable var);
-
- /**
- * translate an expression from hive to Algebricks
- *
- * @param desc
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
-
- /**
- * translate an aggregation from hive to Algebricks
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
-
- /**
- * translate unnesting (UDTF) function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
-
- /**
- * get variable from a schema
- *
- * @param schema
- * @return
- */
- public List<LogicalVariable> getVariablesFromSchema(Schema schema);
-
- /**
- * get variable from name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariable(String name);
-
- /**
- * get variable from field name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariableFromFieldName(String name);
-
- /**
- * get variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getVariable(String fieldName, TypeInfo type);
-
- /**
- * get new variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
-
- /**
- * set the metadata provider
- *
- * @param metadata
- */
- public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
-
- /**
- * get the metadata provider
- *
- * @param metadata
- */
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
-
- /**
- * replace the variable
- *
- * @param oldVar
- * @param newVar
- */
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
-
-}
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings("rawtypes")
+public interface Translator {
+
+ /**
+ * generate input schema
+ *
+ * @param operator
+ * @return
+ */
+ public Schema generateInputSchema(Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr);
+
+ /**
+ * get an assign operator as a child of parent
+ *
+ * @param parent
+ * @param cols
+ * @param variables
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables);
+
+ /**
+ * get type for a logical variable
+ *
+ * @param var
+ * @return type info
+ */
+ public TypeInfo getType(LogicalVariable var);
+
+ /**
+ * translate an expression from hive to Algebricks
+ *
+ * @param desc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
+
+ /**
+ * translate an aggregation from hive to Algebricks
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
+
+ /**
+ * translate unnesting (UDTF) function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
+
+ /**
+ * get variable from a schema
+ *
+ * @param schema
+ * @return
+ */
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema);
+
+ /**
+ * get variable from name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariable(String name);
+
+ /**
+ * get variable from field name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariableFromFieldName(String name);
+
+ /**
+ * get variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getVariable(String fieldName, TypeInfo type);
+
+ /**
+ * get new variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
+
+ /**
+ * update a name-variable binding
+ *
+ * @param fieldName
+ * @param variable
+ */
+ public void updateVariable(String fieldName, LogicalVariable variable);
+
+ /**
+ * set the metadata provider
+ *
+ * @param metadata
+ */
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
+
+ /**
+ * get the metadata provider
+ *
+ * @param metadata
+ */
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
+
+ /**
+ * replace the variable
+ *
+ * @param oldVar
+ * @param newVar
+ */
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
+
+}
diff --git a/hivesterix/pom.xml b/hivesterix/pom.xml
index 2bee50c..ba43c0d 100644
--- a/hivesterix/pom.xml
+++ b/hivesterix/pom.xml
@@ -17,7 +17,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hivesterix</name>
diff --git a/hyracks/hyracks-api/pom.xml b/hyracks/hyracks-api/pom.xml
index 0d75de4..6a56532 100644
--- a/hyracks/hyracks-api/pom.xml
+++ b/hyracks/hyracks-api/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -61,7 +61,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-ipc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/IConnectorDescriptor.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/IConnectorDescriptor.java
index 56200e4..4638118 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/IConnectorDescriptor.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/dataflow/IConnectorDescriptor.java
@@ -111,6 +111,11 @@
BitSet sourceBitmap);
/**
+ * Indicate whether the connector is an all-producers-to-all-consumers connector
+ */
+ public boolean allProducersToAllConsumers();
+
+ /**
* Gets the display name.
*/
public String getDisplayName();
diff --git a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/exceptions/HyracksDataException.java b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/exceptions/HyracksDataException.java
index 6390abf..aab59c8 100644
--- a/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/exceptions/HyracksDataException.java
+++ b/hyracks/hyracks-api/src/main/java/edu/uci/ics/hyracks/api/exceptions/HyracksDataException.java
@@ -17,6 +17,8 @@
public class HyracksDataException extends HyracksException {
private static final long serialVersionUID = 1L;
+ private String nodeId;
+
public HyracksDataException() {
}
@@ -24,11 +26,19 @@
super(message);
}
+ public HyracksDataException(Throwable cause) {
+ super(cause);
+ }
+
public HyracksDataException(String message, Throwable cause) {
super(message, cause);
}
- public HyracksDataException(Throwable cause) {
- super(cause);
+ public void setNodeId(String nodeId) {
+ this.nodeId = nodeId;
+ }
+
+ public String getNodeId() {
+ return nodeId;
}
}
\ No newline at end of file
diff --git a/hyracks/hyracks-client/pom.xml b/hyracks/hyracks-client/pom.xml
index 099c178..99ee616 100644
--- a/hyracks/hyracks-client/pom.xml
+++ b/hyracks/hyracks-client/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,22 +39,22 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-net</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-comm</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java
index 03432a8..51e4950 100644
--- a/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java
+++ b/hyracks/hyracks-client/src/main/java/edu/uci/ics/hyracks/client/dataset/HyracksDatasetReader.java
@@ -104,8 +104,8 @@
getSocketAddress(knownRecords[lastReadPartition]), jobId, resultSetId, lastReadPartition,
NUM_READ_BUFFERS);
lastMonitor = getMonitor(lastReadPartition);
- resultChannel.open(datasetClientCtx);
resultChannel.registerMonitor(lastMonitor);
+ resultChannel.open(datasetClientCtx);
} catch (Exception e) {
throw new HyracksDataException(e);
}
@@ -142,8 +142,8 @@
getSocketAddress(knownRecords[lastReadPartition]), jobId, resultSetId,
lastReadPartition, NUM_READ_BUFFERS);
lastMonitor = getMonitor(lastReadPartition);
- resultChannel.open(datasetClientCtx);
resultChannel.registerMonitor(lastMonitor);
+ resultChannel.open(datasetClientCtx);
} catch (Exception e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-comm/pom.xml b/hyracks/hyracks-comm/pom.xml
index 6755171..f46971a 100644
--- a/hyracks/hyracks-comm/pom.xml
+++ b/hyracks/hyracks-comm/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,12 +39,12 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-net</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/hyracks/hyracks-control/hyracks-control-cc/pom.xml b/hyracks/hyracks-control/hyracks-control-cc/pom.xml
index a30fd89..17638e8 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-cc/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -40,7 +40,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
index 67ba2b6..6785d6f 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/NodeControllerState.java
@@ -189,36 +189,37 @@
public void notifyHeartbeat(HeartbeatData hbData) {
lastHeartbeatDuration = 0;
-
hbTime[rrdPtr] = System.currentTimeMillis();
- heapInitSize[rrdPtr] = hbData.heapInitSize;
- heapUsedSize[rrdPtr] = hbData.heapUsedSize;
- heapCommittedSize[rrdPtr] = hbData.heapCommittedSize;
- heapMaxSize[rrdPtr] = hbData.heapMaxSize;
- nonheapInitSize[rrdPtr] = hbData.nonheapInitSize;
- nonheapUsedSize[rrdPtr] = hbData.nonheapUsedSize;
- nonheapCommittedSize[rrdPtr] = hbData.nonheapCommittedSize;
- nonheapMaxSize[rrdPtr] = hbData.nonheapMaxSize;
- threadCount[rrdPtr] = hbData.threadCount;
- peakThreadCount[rrdPtr] = hbData.peakThreadCount;
- systemLoadAverage[rrdPtr] = hbData.systemLoadAverage;
- int gcN = hbSchema.getGarbageCollectorInfos().length;
- for (int i = 0; i < gcN; ++i) {
- gcCollectionCounts[i][rrdPtr] = hbData.gcCollectionCounts[i];
- gcCollectionTimes[i][rrdPtr] = hbData.gcCollectionTimes[i];
+ if (hbData != null) {
+ heapInitSize[rrdPtr] = hbData.heapInitSize;
+ heapUsedSize[rrdPtr] = hbData.heapUsedSize;
+ heapCommittedSize[rrdPtr] = hbData.heapCommittedSize;
+ heapMaxSize[rrdPtr] = hbData.heapMaxSize;
+ nonheapInitSize[rrdPtr] = hbData.nonheapInitSize;
+ nonheapUsedSize[rrdPtr] = hbData.nonheapUsedSize;
+ nonheapCommittedSize[rrdPtr] = hbData.nonheapCommittedSize;
+ nonheapMaxSize[rrdPtr] = hbData.nonheapMaxSize;
+ threadCount[rrdPtr] = hbData.threadCount;
+ peakThreadCount[rrdPtr] = hbData.peakThreadCount;
+ systemLoadAverage[rrdPtr] = hbData.systemLoadAverage;
+ int gcN = hbSchema.getGarbageCollectorInfos().length;
+ for (int i = 0; i < gcN; ++i) {
+ gcCollectionCounts[i][rrdPtr] = hbData.gcCollectionCounts[i];
+ gcCollectionTimes[i][rrdPtr] = hbData.gcCollectionTimes[i];
+ }
+ netPayloadBytesRead[rrdPtr] = hbData.netPayloadBytesRead;
+ netPayloadBytesWritten[rrdPtr] = hbData.netPayloadBytesWritten;
+ netSignalingBytesRead[rrdPtr] = hbData.netSignalingBytesRead;
+ netSignalingBytesWritten[rrdPtr] = hbData.netSignalingBytesWritten;
+ datasetNetPayloadBytesRead[rrdPtr] = hbData.datasetNetPayloadBytesRead;
+ datasetNetPayloadBytesWritten[rrdPtr] = hbData.datasetNetPayloadBytesWritten;
+ datasetNetSignalingBytesRead[rrdPtr] = hbData.datasetNetSignalingBytesRead;
+ datasetNetSignalingBytesWritten[rrdPtr] = hbData.datasetNetSignalingBytesWritten;
+ ipcMessagesSent[rrdPtr] = hbData.ipcMessagesSent;
+ ipcMessageBytesSent[rrdPtr] = hbData.ipcMessageBytesSent;
+ ipcMessagesReceived[rrdPtr] = hbData.ipcMessagesReceived;
+ ipcMessageBytesReceived[rrdPtr] = hbData.ipcMessageBytesReceived;
}
- netPayloadBytesRead[rrdPtr] = hbData.netPayloadBytesRead;
- netPayloadBytesWritten[rrdPtr] = hbData.netPayloadBytesWritten;
- netSignalingBytesRead[rrdPtr] = hbData.netSignalingBytesRead;
- netSignalingBytesWritten[rrdPtr] = hbData.netSignalingBytesWritten;
- datasetNetPayloadBytesRead[rrdPtr] = hbData.datasetNetPayloadBytesRead;
- datasetNetPayloadBytesWritten[rrdPtr] = hbData.datasetNetPayloadBytesWritten;
- datasetNetSignalingBytesRead[rrdPtr] = hbData.datasetNetSignalingBytesRead;
- datasetNetSignalingBytesWritten[rrdPtr] = hbData.datasetNetSignalingBytesWritten;
- ipcMessagesSent[rrdPtr] = hbData.ipcMessagesSent;
- ipcMessageBytesSent[rrdPtr] = hbData.ipcMessageBytesSent;
- ipcMessagesReceived[rrdPtr] = hbData.ipcMessagesReceived;
- ipcMessageBytesReceived[rrdPtr] = hbData.ipcMessageBytesReceived;
rrdPtr = (rrdPtr + 1) % RRD_SIZE;
}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.java
index e2daff8..cd3ea6e 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.java
@@ -69,7 +69,6 @@
JSONObject o = item.getModelObject();
try {
item.add(new Label("job-id", o.getString("job-id")));
- item.add(new Label("application-name", o.getString("application-name")));
item.add(new Label("status", o.getString("status")));
item.add(new Label("create-time", o.getString("create-time")));
item.add(new Label("start-time", o.getString("start-time")));
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/job/JobRun.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/job/JobRun.java
index 5738907..1f68210 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/job/JobRun.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/job/JobRun.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.control.cc.job;
import java.util.ArrayList;
+import java.io.PrintWriter;
+import java.io.StringWriter;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
@@ -46,6 +48,7 @@
import edu.uci.ics.hyracks.control.cc.scheduler.ActivityPartitionDetails;
import edu.uci.ics.hyracks.control.cc.scheduler.JobScheduler;
import edu.uci.ics.hyracks.control.common.job.profiling.om.JobProfile;
+import edu.uci.ics.hyracks.control.common.utils.ExceptionUtils;
public class JobRun implements IJobStatusConditionVariable {
private final DeploymentId deploymentId;
@@ -359,7 +362,13 @@
taskAttempt.put("end-time", ta.getEndTime());
List<Exception> exceptions = ta.getExceptions();
if (exceptions != null && !exceptions.isEmpty()) {
- taskAttempt.put("failure-details", exceptions);
+ List<Exception> filteredExceptions = ExceptionUtils
+ .getActualExceptions(exceptions);
+ for (Exception exception : filteredExceptions) {
+ StringWriter exceptionWriter = new StringWriter();
+ exception.printStackTrace(new PrintWriter(exceptionWriter));
+ taskAttempt.put("failure-details", exceptionWriter.toString());
+ }
}
taskAttempts.put(taskAttempt);
}
@@ -388,4 +397,4 @@
public Map<OperatorDescriptorId, List<String>> getOperatorLocations() {
return operatorLocations;
}
-}
\ No newline at end of file
+}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/ActivityClusterPlanner.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/ActivityClusterPlanner.java
index 4d2ad6b..3863eda 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/ActivityClusterPlanner.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/ActivityClusterPlanner.java
@@ -190,18 +190,29 @@
ActivityId ac2 = ac.getConsumerActivity(cdId);
Task[] ac2TaskStates = activityPlanMap.get(ac2).getTasks();
int nConsumers = ac2TaskStates.length;
- for (int i = 0; i < nProducers; ++i) {
- c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
- List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i]
- .getTaskId());
- if (cInfoList == null) {
- cInfoList = new ArrayList<Pair<TaskId, ConnectorDescriptorId>>();
- taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
- }
- for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
+ if (c.allProducersToAllConsumers()) {
+ List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = new ArrayList<Pair<TaskId, ConnectorDescriptorId>>();
+ for (int j = 0; j < nConsumers; j++) {
TaskId targetTID = ac2TaskStates[j].getTaskId();
cInfoList.add(Pair.<TaskId, ConnectorDescriptorId> of(targetTID, cdId));
}
+ for (int i = 0; i < nProducers; ++i) {
+ taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
+ }
+ } else {
+ for (int i = 0; i < nProducers; ++i) {
+ c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
+ List<Pair<TaskId, ConnectorDescriptorId>> cInfoList = taskConnectivity.get(ac1TaskStates[i]
+ .getTaskId());
+ if (cInfoList == null) {
+ cInfoList = new ArrayList<Pair<TaskId, ConnectorDescriptorId>>();
+ taskConnectivity.put(ac1TaskStates[i].getTaskId(), cInfoList);
+ }
+ for (int j = targetBitmap.nextSetBit(0); j >= 0; j = targetBitmap.nextSetBit(j + 1)) {
+ TaskId targetTID = ac2TaskStates[j].getTaskId();
+ cInfoList.add(Pair.<TaskId, ConnectorDescriptorId> of(targetTID, cdId));
+ }
+ }
}
}
}
@@ -341,9 +352,15 @@
int nConsumers = ac2TaskStates.length;
int[] fanouts = new int[nProducers];
- for (int i = 0; i < nProducers; ++i) {
- c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
- fanouts[i] = targetBitmap.cardinality();
+ if (c.allProducersToAllConsumers()) {
+ for (int i = 0; i < nProducers; ++i) {
+ fanouts[i] = nConsumers;
+ }
+ } else {
+ for (int i = 0; i < nProducers; ++i) {
+ c.indicateTargetPartitions(nProducers, nConsumers, i, targetBitmap);
+ fanouts[i] = targetBitmap.cardinality();
+ }
}
IConnectorPolicy cp = assignConnectorPolicy(ac, c, nProducers, nConsumers, fanouts);
cPolicyMap.put(cdId, cp);
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/JobScheduler.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/JobScheduler.java
index d568963..d2c018f 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/JobScheduler.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/scheduler/JobScheduler.java
@@ -27,6 +27,9 @@
import java.util.logging.Level;
import java.util.logging.Logger;
+import org.json.JSONException;
+import org.json.JSONObject;
+
import edu.uci.ics.hyracks.api.constraints.Constraint;
import edu.uci.ics.hyracks.api.constraints.expressions.LValueConstraintExpression;
import edu.uci.ics.hyracks.api.constraints.expressions.PartitionLocationExpression;
@@ -46,6 +49,7 @@
import edu.uci.ics.hyracks.api.util.JavaSerializationUtils;
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
import edu.uci.ics.hyracks.control.cc.NodeControllerState;
+import edu.uci.ics.hyracks.control.cc.application.CCApplicationContext;
import edu.uci.ics.hyracks.control.cc.job.ActivityClusterPlan;
import edu.uci.ics.hyracks.control.cc.job.JobRun;
import edu.uci.ics.hyracks.control.cc.job.Task;
@@ -462,13 +466,14 @@
private void abortJob(List<Exception> exceptions) {
Set<TaskCluster> inProgressTaskClustersCopy = new HashSet<TaskCluster>(inProgressTaskClusters);
for (TaskCluster tc : inProgressTaskClustersCopy) {
- abortTaskCluster(findLastTaskClusterAttempt(tc));
+ abortTaskCluster(findLastTaskClusterAttempt(tc), TaskClusterAttempt.TaskClusterStatus.ABORTED);
}
assert inProgressTaskClusters.isEmpty();
ccs.getWorkQueue().schedule(new JobCleanupWork(ccs, jobRun.getJobId(), JobStatus.FAILURE, exceptions));
}
- private void abortTaskCluster(TaskClusterAttempt tcAttempt) {
+ private void abortTaskCluster(TaskClusterAttempt tcAttempt,
+ TaskClusterAttempt.TaskClusterStatus failedOrAbortedStatus) {
LOGGER.fine("Aborting task cluster: " + tcAttempt.getAttempt());
Set<TaskAttemptId> abortTaskIds = new HashSet<TaskAttemptId>();
Map<String, List<TaskAttemptId>> abortTaskAttemptMap = new HashMap<String, List<TaskAttemptId>>();
@@ -481,11 +486,13 @@
ta.setStatus(TaskAttempt.TaskStatus.ABORTED, null);
ta.setEndTime(System.currentTimeMillis());
List<TaskAttemptId> abortTaskAttempts = abortTaskAttemptMap.get(ta.getNodeId());
- if (abortTaskAttempts == null) {
+ if (status == TaskAttempt.TaskStatus.RUNNING && abortTaskAttempts == null) {
abortTaskAttempts = new ArrayList<TaskAttemptId>();
abortTaskAttemptMap.put(ta.getNodeId(), abortTaskAttempts);
}
- abortTaskAttempts.add(taId);
+ if (status == TaskAttempt.TaskStatus.RUNNING) {
+ abortTaskAttempts.add(taId);
+ }
}
}
final JobId jobId = jobRun.getJobId();
@@ -509,6 +516,9 @@
PartitionMatchMaker pmm = jobRun.getPartitionMatchMaker();
pmm.removeUncommittedPartitions(tc.getProducedPartitions(), abortTaskIds);
pmm.removePartitionRequests(tc.getRequiredPartitions(), abortTaskIds);
+
+ tcAttempt.setStatus(failedOrAbortedStatus);
+ tcAttempt.setEndTime(System.currentTimeMillis());
}
private void abortDoomedTaskClusters() throws HyracksException {
@@ -523,9 +533,7 @@
for (TaskCluster tc : doomedTaskClusters) {
TaskClusterAttempt tca = findLastTaskClusterAttempt(tc);
if (tca != null) {
- abortTaskCluster(tca);
- tca.setEndTime(System.currentTimeMillis());
- tca.setStatus(TaskClusterAttempt.TaskClusterStatus.ABORTED);
+ abortTaskCluster(tca, TaskClusterAttempt.TaskClusterStatus.ABORTED);
}
}
}
@@ -612,9 +620,7 @@
if (lastAttempt != null && taId.getAttempt() == lastAttempt.getAttempt()) {
LOGGER.fine("Marking TaskAttempt " + ta.getTaskAttemptId() + " as failed");
ta.setStatus(TaskAttempt.TaskStatus.FAILED, exceptions);
- abortTaskCluster(lastAttempt);
- lastAttempt.setStatus(TaskClusterAttempt.TaskClusterStatus.FAILED);
- lastAttempt.setEndTime(System.currentTimeMillis());
+ abortTaskCluster(lastAttempt, TaskClusterAttempt.TaskClusterStatus.FAILED);
abortDoomedTaskClusters();
if (lastAttempt.getAttempt() >= jobRun.getActivityClusterGraph().getMaxReattempts()) {
abortJob(exceptions);
@@ -639,32 +645,41 @@
public void notifyNodeFailures(Set<String> deadNodes) {
try {
jobRun.getPartitionMatchMaker().notifyNodeFailures(deadNodes);
+ jobRun.getParticipatingNodeIds().removeAll(deadNodes);
+ jobRun.getCleanupPendingNodeIds().removeAll(deadNodes);
+ if (jobRun.getPendingStatus() != null && jobRun.getCleanupPendingNodeIds().isEmpty()) {
+ finishJob(jobRun);
+ return;
+ }
for (ActivityCluster ac : jobRun.getActivityClusterGraph().getActivityClusterMap().values()) {
- TaskCluster[] taskClusters = getActivityClusterPlan(ac).getTaskClusters();
- if (taskClusters != null) {
- for (TaskCluster tc : taskClusters) {
- TaskClusterAttempt lastTaskClusterAttempt = findLastTaskClusterAttempt(tc);
- if (lastTaskClusterAttempt != null
- && (lastTaskClusterAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED || lastTaskClusterAttempt
- .getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING)) {
- boolean abort = false;
- for (TaskAttempt ta : lastTaskClusterAttempt.getTaskAttempts().values()) {
- assert (ta.getStatus() == TaskAttempt.TaskStatus.COMPLETED || ta.getStatus() == TaskAttempt.TaskStatus.RUNNING);
- if (deadNodes.contains(ta.getNodeId())) {
- ta.setStatus(
- TaskAttempt.TaskStatus.FAILED,
- Collections.singletonList(new Exception("Node " + ta.getNodeId()
- + " failed")));
- ta.setEndTime(System.currentTimeMillis());
- abort = true;
+ if (isPlanned(ac)) {
+ TaskCluster[] taskClusters = getActivityClusterPlan(ac).getTaskClusters();
+ if (taskClusters != null) {
+ for (TaskCluster tc : taskClusters) {
+ TaskClusterAttempt lastTaskClusterAttempt = findLastTaskClusterAttempt(tc);
+ if (lastTaskClusterAttempt != null
+ && (lastTaskClusterAttempt.getStatus() == TaskClusterAttempt.TaskClusterStatus.COMPLETED || lastTaskClusterAttempt
+ .getStatus() == TaskClusterAttempt.TaskClusterStatus.RUNNING)) {
+ boolean abort = false;
+ for (TaskAttempt ta : lastTaskClusterAttempt.getTaskAttempts().values()) {
+ assert (ta.getStatus() == TaskAttempt.TaskStatus.COMPLETED || ta.getStatus() == TaskAttempt.TaskStatus.RUNNING);
+ if (deadNodes.contains(ta.getNodeId())) {
+ ta.setStatus(
+ TaskAttempt.TaskStatus.FAILED,
+ Collections.singletonList(new Exception("Node " + ta.getNodeId()
+ + " failed")));
+ ta.setEndTime(System.currentTimeMillis());
+ abort = true;
+ }
+ }
+ if (abort) {
+ abortTaskCluster(lastTaskClusterAttempt,
+ TaskClusterAttempt.TaskClusterStatus.ABORTED);
}
}
- if (abort) {
- abortTaskCluster(lastTaskClusterAttempt);
- }
}
+ abortDoomedTaskClusters();
}
- abortDoomedTaskClusters();
}
}
startRunnableActivityClusters();
@@ -672,4 +687,37 @@
abortJob(Collections.singletonList(e));
}
}
+
+ private void finishJob(final JobRun run) {
+ JobId jobId = run.getJobId();
+ CCApplicationContext appCtx = ccs.getApplicationContext();
+ if (appCtx != null) {
+ try {
+ appCtx.notifyJobFinish(jobId);
+ } catch (HyracksException e) {
+ e.printStackTrace();
+ }
+ }
+ run.setStatus(run.getPendingStatus(), run.getPendingExceptions());
+ ccs.getActiveRunMap().remove(jobId);
+ ccs.getRunMapArchive().put(jobId, run);
+ ccs.getRunHistory().put(jobId, run.getExceptions());
+ try {
+ ccs.getJobLogFile().log(createJobLogObject(run));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private JSONObject createJobLogObject(final JobRun run) {
+ JSONObject jobLogObject = new JSONObject();
+ try {
+ ActivityClusterGraph acg = run.getActivityClusterGraph();
+ jobLogObject.put("activity-cluster-graph", acg.toJSON());
+ jobLogObject.put("job-run", run.toJSON());
+ } catch (JSONException e) {
+ throw new RuntimeException(e);
+ }
+ return jobLogObject;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractHeartbeatWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractHeartbeatWork.java
new file mode 100644
index 0000000..7605295
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractHeartbeatWork.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.control.cc.work;
+
+import java.util.Map;
+
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.cc.NodeControllerState;
+import edu.uci.ics.hyracks.control.common.heartbeat.HeartbeatData;
+import edu.uci.ics.hyracks.control.common.work.SynchronizableWork;
+
+public abstract class AbstractHeartbeatWork extends SynchronizableWork {
+
+ private final ClusterControllerService ccs;
+ private final String nodeId;
+ private final HeartbeatData hbData;
+
+ public AbstractHeartbeatWork(ClusterControllerService ccs, String nodeId, HeartbeatData hbData) {
+ this.ccs = ccs;
+ this.nodeId = nodeId;
+ this.hbData = hbData;
+ }
+
+ @Override
+ public void doRun() {
+ Map<String, NodeControllerState> nodeMap = ccs.getNodeMap();
+ NodeControllerState state = nodeMap.get(nodeId);
+ if (state != null) {
+ state.notifyHeartbeat(hbData);
+ }
+ runWork();
+ }
+
+ public abstract void runWork();
+
+}
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractTaskLifecycleWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractTaskLifecycleWork.java
index 58aaa57..bcb278b 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractTaskLifecycleWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/AbstractTaskLifecycleWork.java
@@ -29,15 +29,15 @@
import edu.uci.ics.hyracks.control.cc.job.TaskAttempt;
import edu.uci.ics.hyracks.control.cc.job.TaskCluster;
import edu.uci.ics.hyracks.control.cc.job.TaskClusterAttempt;
-import edu.uci.ics.hyracks.control.common.work.AbstractWork;
-public abstract class AbstractTaskLifecycleWork extends AbstractWork {
+public abstract class AbstractTaskLifecycleWork extends AbstractHeartbeatWork {
protected final ClusterControllerService ccs;
protected final JobId jobId;
protected final TaskAttemptId taId;
protected final String nodeId;
public AbstractTaskLifecycleWork(ClusterControllerService ccs, JobId jobId, TaskAttemptId taId, String nodeId) {
+ super(ccs, nodeId, null);
this.ccs = ccs;
this.jobId = jobId;
this.taId = taId;
@@ -45,7 +45,7 @@
}
@Override
- public final void run() {
+ public final void runWork() {
JobRun run = ccs.getActiveRunMap().get(jobId);
if (run != null) {
TaskId tid = taId.getTaskId();
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ApplicationMessageWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ApplicationMessageWork.java
index bc58d1e..c4d202f 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ApplicationMessageWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/ApplicationMessageWork.java
@@ -22,12 +22,11 @@
import edu.uci.ics.hyracks.api.messages.IMessage;
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
import edu.uci.ics.hyracks.control.common.deployment.DeploymentUtils;
-import edu.uci.ics.hyracks.control.common.work.AbstractWork;
/**
* @author rico
*/
-public class ApplicationMessageWork extends AbstractWork {
+public class ApplicationMessageWork extends AbstractHeartbeatWork {
private static final Logger LOGGER = Logger.getLogger(ApplicationMessageWork.class.getName());
private byte[] message;
@@ -36,6 +35,7 @@
private ClusterControllerService ccs;
public ApplicationMessageWork(ClusterControllerService ccs, byte[] message, DeploymentId deploymentId, String nodeId) {
+ super(ccs, nodeId, null);
this.ccs = ccs;
this.deploymentId = deploymentId;
this.nodeId = nodeId;
@@ -43,7 +43,7 @@
}
@Override
- public void run() {
+ public void runWork() {
final ICCApplicationContext ctx = ccs.getApplicationContext();
try {
final IMessage data = (IMessage) DeploymentUtils.deserialize(message, deploymentId, ctx);
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobCleanupWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobCleanupWork.java
index 3b29b52..46a7c16 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobCleanupWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobCleanupWork.java
@@ -14,6 +14,7 @@
*/
package edu.uci.ics.hyracks.control.cc.work;
+import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
@@ -48,46 +49,67 @@
@Override
public void run() {
+ LOGGER.info("Cleanup for JobRun with id: " + jobId);
final JobRun run = ccs.getActiveRunMap().get(jobId);
if (run == null) {
LOGGER.warning("Unable to find JobRun with id: " + jobId);
return;
}
+ if (run.getPendingStatus() != null && run.getCleanupPendingNodeIds().isEmpty()) {
+ finishJob(run);
+ return;
+ }
if (run.getPendingStatus() != null) {
LOGGER.warning("Ignoring duplicate cleanup for JobRun with id: " + jobId);
return;
}
Set<String> targetNodes = run.getParticipatingNodeIds();
run.getCleanupPendingNodeIds().addAll(targetNodes);
- run.setPendingStatus(status, exceptions);
+ if (run.getPendingStatus() != JobStatus.FAILURE && run.getPendingStatus() != JobStatus.TERMINATED) {
+ run.setPendingStatus(status, exceptions);
+ }
if (targetNodes != null && !targetNodes.isEmpty()) {
+ Set<String> toDelete = new HashSet<String>();
for (String n : targetNodes) {
NodeControllerState ncs = ccs.getNodeMap().get(n);
try {
- ncs.getNodeController().cleanUpJoblet(jobId, status);
+ if (ncs == null) {
+ toDelete.add(n);
+ } else {
+ ncs.getNodeController().cleanUpJoblet(jobId, status);
+ }
} catch (Exception e) {
e.printStackTrace();
}
}
+ targetNodes.removeAll(toDelete);
+ run.getCleanupPendingNodeIds().removeAll(toDelete);
+ if (run.getCleanupPendingNodeIds().isEmpty()) {
+ finishJob(run);
+ }
} else {
- CCApplicationContext appCtx = ccs.getApplicationContext();
- if (appCtx != null) {
- try {
- appCtx.notifyJobFinish(jobId);
- } catch (HyracksException e) {
- e.printStackTrace();
- }
- }
- run.setStatus(run.getPendingStatus(), run.getPendingExceptions());
- ccs.getActiveRunMap().remove(jobId);
- ccs.getRunMapArchive().put(jobId, run);
- ccs.getRunHistory().put(jobId, run.getExceptions());
+ finishJob(run);
+ }
+ }
+
+ private void finishJob(final JobRun run) {
+ CCApplicationContext appCtx = ccs.getApplicationContext();
+ if (appCtx != null) {
try {
- ccs.getJobLogFile().log(createJobLogObject(run));
- } catch (Exception e) {
- throw new RuntimeException(e);
+ appCtx.notifyJobFinish(jobId);
+ } catch (HyracksException e) {
+ e.printStackTrace();
}
}
+ run.setStatus(run.getPendingStatus(), run.getPendingExceptions());
+ ccs.getActiveRunMap().remove(jobId);
+ ccs.getRunMapArchive().put(jobId, run);
+ ccs.getRunHistory().put(jobId, run.getExceptions());
+ try {
+ ccs.getJobLogFile().log(createJobLogObject(run));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
}
private JSONObject createJobLogObject(final JobRun run) {
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobletCleanupNotificationWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobletCleanupNotificationWork.java
index 5eb851a..2d6bdea 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobletCleanupNotificationWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/JobletCleanupNotificationWork.java
@@ -28,9 +28,8 @@
import edu.uci.ics.hyracks.control.cc.NodeControllerState;
import edu.uci.ics.hyracks.control.cc.application.CCApplicationContext;
import edu.uci.ics.hyracks.control.cc.job.JobRun;
-import edu.uci.ics.hyracks.control.common.work.AbstractWork;
-public class JobletCleanupNotificationWork extends AbstractWork {
+public class JobletCleanupNotificationWork extends AbstractHeartbeatWork {
private static final Logger LOGGER = Logger.getLogger(JobletCleanupNotificationWork.class.getName());
private ClusterControllerService ccs;
@@ -38,13 +37,14 @@
private String nodeId;
public JobletCleanupNotificationWork(ClusterControllerService ccs, JobId jobId, String nodeId) {
+ super(ccs, nodeId, null);
this.ccs = ccs;
this.jobId = jobId;
this.nodeId = nodeId;
}
@Override
- public void run() {
+ public void runWork() {
final JobRun run = ccs.getActiveRunMap().get(jobId);
Set<String> cleanupPendingNodes = run.getCleanupPendingNodeIds();
if (!cleanupPendingNodes.remove(nodeId)) {
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NodeHeartbeatWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NodeHeartbeatWork.java
index 970a45d..8ef8f66 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NodeHeartbeatWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NodeHeartbeatWork.java
@@ -14,32 +14,20 @@
*/
package edu.uci.ics.hyracks.control.cc.work;
-import java.util.Map;
import java.util.logging.Level;
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
-import edu.uci.ics.hyracks.control.cc.NodeControllerState;
import edu.uci.ics.hyracks.control.common.heartbeat.HeartbeatData;
-import edu.uci.ics.hyracks.control.common.work.SynchronizableWork;
-public class NodeHeartbeatWork extends SynchronizableWork {
- private final ClusterControllerService ccs;
- private final String nodeId;
- private final HeartbeatData hbData;
+public class NodeHeartbeatWork extends AbstractHeartbeatWork {
public NodeHeartbeatWork(ClusterControllerService ccs, String nodeId, HeartbeatData hbData) {
- this.ccs = ccs;
- this.nodeId = nodeId;
- this.hbData = hbData;
+ super(ccs, nodeId, hbData);
}
@Override
- protected void doRun() throws Exception {
- Map<String, NodeControllerState> nodeMap = ccs.getNodeMap();
- NodeControllerState state = nodeMap.get(nodeId);
- if (state != null) {
- state.notifyHeartbeat(hbData);
- }
+ public void runWork() {
+
}
@Override
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NotifyDeployBinaryWork.java b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NotifyDeployBinaryWork.java
index c4c8873..c35f385 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NotifyDeployBinaryWork.java
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/java/edu/uci/ics/hyracks/control/cc/work/NotifyDeployBinaryWork.java
@@ -19,14 +19,13 @@
import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
import edu.uci.ics.hyracks.control.common.deployment.DeploymentRun;
import edu.uci.ics.hyracks.control.common.deployment.DeploymentStatus;
-import edu.uci.ics.hyracks.control.common.work.AbstractWork;
/***
* This is the work happens on the CC when CC gets a deployment or undeployment notification status message from one NC.
*
* @author yingyib
*/
-public class NotifyDeployBinaryWork extends AbstractWork {
+public class NotifyDeployBinaryWork extends AbstractHeartbeatWork {
private final ClusterControllerService ccs;
private final String nodeId;
@@ -35,6 +34,7 @@
public NotifyDeployBinaryWork(ClusterControllerService ccs, DeploymentId deploymentId, String nodeId,
DeploymentStatus deploymentStatus) {
+ super(ccs, nodeId, null);
this.ccs = ccs;
this.nodeId = nodeId;
this.deploymentId = deploymentId;
@@ -43,7 +43,7 @@
}
@Override
- public void run() {
+ public void runWork() {
/** triggered remotely by a NC to notify that the NC is deployed */
DeploymentRun dRun = ccs.getDeploymentRun(deploymentId);
dRun.notifyDeploymentStatus(nodeId, deploymentStatus);
diff --git a/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.html b/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.html
index 5810f23..4220932 100644
--- a/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.html
+++ b/hyracks/hyracks-control/hyracks-control-cc/src/main/resources/edu/uci/ics/hyracks/control/cc/adminconsole/pages/IndexPage.html
@@ -38,9 +38,6 @@
<span wicket:id="job-id"></span>
</td>
<td>
- <span wicket:id="application-name"></span>
- </td>
- <td>
<span wicket:id="status"></span>
</td>
<td>
diff --git a/hyracks/hyracks-control/hyracks-control-common/pom.xml b/hyracks/hyracks-control/hyracks-control-common/pom.xml
index 0056e99..1f3c764 100644
--- a/hyracks/hyracks-control/hyracks-control-common/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-common/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,7 +41,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/dataset/ResultStateSweeper.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/dataset/ResultStateSweeper.java
index 69b560c..5a6d849 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/dataset/ResultStateSweeper.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/dataset/ResultStateSweeper.java
@@ -55,6 +55,7 @@
} catch (InterruptedException e) {
LOGGER.severe("Result cleaner thread interrupted, but we continue running it.");
// There isn't much we can do really here
+ break; // the interrupt was explicit from another thread. This thread should shut down...
}
}
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/utils/ExceptionUtils.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/utils/ExceptionUtils.java
new file mode 100644
index 0000000..cbdc6e5
--- /dev/null
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/utils/ExceptionUtils.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.control.common.utils;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * @author yingyib
+ */
+public class ExceptionUtils {
+
+ public static List<Exception> getActualExceptions(List<Exception> allExceptions) {
+ List<Exception> exceptions = new ArrayList<Exception>();
+ for (Exception exception : allExceptions) {
+ if (possibleRootCause(exception)) {
+ exceptions.add(exception);
+ }
+ }
+ return exceptions;
+ }
+
+ public static void setNodeIds(Collection<Exception> exceptions, String nodeId) {
+ List<Exception> newExceptions = new ArrayList<Exception>();
+ for (Exception e : exceptions) {
+ HyracksDataException newException = new HyracksDataException(e);
+ newException.setNodeId(nodeId);
+ newExceptions.add(newException);
+ }
+ exceptions.clear();
+ exceptions.addAll(newExceptions);
+ }
+
+ private static boolean possibleRootCause(Throwable exception) {
+ Throwable cause = exception;
+ while ((cause = cause.getCause()) != null) {
+ if (cause instanceof java.lang.InterruptedException
+ || cause instanceof java.nio.channels.ClosedChannelException) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+}
diff --git a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/work/WorkQueue.java b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/work/WorkQueue.java
index f12c981..58e12cf 100644
--- a/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/work/WorkQueue.java
+++ b/hyracks/hyracks-control/hyracks-control-common/src/main/java/edu/uci/ics/hyracks/control/common/work/WorkQueue.java
@@ -87,6 +87,7 @@
private class WorkerThread extends Thread {
WorkerThread() {
setDaemon(true);
+ setPriority(MAX_PRIORITY);
}
@Override
diff --git a/hyracks/hyracks-control/hyracks-control-nc/pom.xml b/hyracks/hyracks-control/hyracks-control-nc/pom.xml
index af384d4..006a07b 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/pom.xml
+++ b/hyracks/hyracks-control/hyracks-control-nc/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -40,19 +40,19 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-net</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-comm</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
<reporting>
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
index 53e5a01..c72ced1 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/Task.java
@@ -49,6 +49,7 @@
import edu.uci.ics.hyracks.control.common.job.profiling.counters.Counter;
import edu.uci.ics.hyracks.control.common.job.profiling.om.PartitionProfile;
import edu.uci.ics.hyracks.control.common.job.profiling.om.TaskProfile;
+import edu.uci.ics.hyracks.control.common.utils.ExceptionUtils;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.control.nc.io.WorkspaceFileFactory;
import edu.uci.ics.hyracks.control.nc.resources.DefaultDeallocatableRegistry;
@@ -243,6 +244,7 @@
addPendingThread(thread);
String oldName = thread.getName();
thread.setName(displayName + ":" + taskAttemptId + ":" + cIdx);
+ thread.setPriority(Thread.MIN_PRIORITY);
try {
pushFrames(collector, writer);
} catch (HyracksDataException e) {
@@ -276,7 +278,11 @@
removePendingThread(ct);
}
if (!exceptions.isEmpty()) {
+ for (Exception e : exceptions) {
+ e.printStackTrace();
+ }
NodeControllerService ncs = joblet.getNodeController();
+ ExceptionUtils.setNodeIds(exceptions, ncs.getId());
ncs.getWorkQueue().schedule(new NotifyTaskFailureWork(ncs, this, exceptions));
}
}
diff --git a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/NotifyTaskFailureWork.java b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/NotifyTaskFailureWork.java
index c4784ff..56379d2 100644
--- a/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/NotifyTaskFailureWork.java
+++ b/hyracks/hyracks-control/hyracks-control-nc/src/main/java/edu/uci/ics/hyracks/control/nc/work/NotifyTaskFailureWork.java
@@ -43,6 +43,7 @@
dpm.abortReader(jobId);
}
ncs.getClusterController().notifyTaskFailure(jobId, task.getTaskAttemptId(), ncs.getId(), exceptions);
+ //exceptions.get(0).printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
diff --git a/hyracks/hyracks-control/pom.xml b/hyracks/hyracks-control/pom.xml
index a98ffe5..7b43c1f 100644
--- a/hyracks/hyracks-control/pom.xml
+++ b/hyracks/hyracks-control/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-data/hyracks-data-std/pom.xml b/hyracks/hyracks-data/hyracks-data-std/pom.xml
index 90d0229..0758c9a 100644
--- a/hyracks/hyracks-data/hyracks-data-std/pom.xml
+++ b/hyracks/hyracks-data/hyracks-data-std/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,7 +41,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/hyracks/hyracks-data/pom.xml b/hyracks/hyracks-data/pom.xml
index 32dd51d..a5ffd98 100644
--- a/hyracks/hyracks-data/pom.xml
+++ b/hyracks/hyracks-data/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-dataflow-common/pom.xml b/hyracks/hyracks-dataflow-common/pom.xml
index 6e270bf..d779ca4 100644
--- a/hyracks/hyracks-dataflow-common/pom.xml
+++ b/hyracks/hyracks-dataflow-common/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -40,14 +40,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
</project>
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameTupleAccessor.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameTupleAccessor.java
index ce4c74c..ac44c11 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameTupleAccessor.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/FrameTupleAccessor.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
+import edu.uci.ics.hyracks.dataflow.common.util.IntSerDeUtils;
/**
* FrameTupleCursor is used to navigate over tuples in a Frame. A frame is
@@ -57,27 +58,28 @@
@Override
public int getTupleCount() {
- return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize));
+ return IntSerDeUtils.getInt(buffer.array(), FrameHelper.getTupleCountOffset(frameSize));
}
@Override
public int getTupleStartOffset(int tupleIndex) {
- return tupleIndex == 0 ? 0 : buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * tupleIndex);
+ return tupleIndex == 0 ? 0 : IntSerDeUtils.getInt(buffer.array(), FrameHelper.getTupleCountOffset(frameSize)
+ - 4 * tupleIndex);
}
@Override
public int getTupleEndOffset(int tupleIndex) {
- return buffer.getInt(FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1));
+ return IntSerDeUtils.getInt(buffer.array(), FrameHelper.getTupleCountOffset(frameSize) - 4 * (tupleIndex + 1));
}
@Override
public int getFieldStartOffset(int tupleIndex, int fIdx) {
- return fIdx == 0 ? 0 : buffer.getInt(getTupleStartOffset(tupleIndex) + (fIdx - 1) * 4);
+ return fIdx == 0 ? 0 : IntSerDeUtils.getInt(buffer.array(), getTupleStartOffset(tupleIndex) + (fIdx - 1) * 4);
}
@Override
public int getFieldEndOffset(int tupleIndex, int fIdx) {
- return buffer.getInt(getTupleStartOffset(tupleIndex) + fIdx * 4);
+ return IntSerDeUtils.getInt(buffer.array(), getTupleStartOffset(tupleIndex) + fIdx * 4);
}
@Override
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/IntSerDeUtils.java
similarity index 65%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/IntSerDeUtils.java
index cde5022..9faef09 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/util/IntSerDeUtils.java
@@ -13,16 +13,13 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.hyracks.dataflow.common.util;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+public class IntSerDeUtils {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+ public static int getInt(byte[] bytes, int offset) {
+ return ((bytes[offset] & 0xff) << 24) + ((bytes[offset + 1] & 0xff) << 16) + ((bytes[offset + 2] & 0xff) << 8)
+ + ((bytes[offset + 3] & 0xff) << 0);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
}
diff --git a/hyracks/hyracks-dataflow-hadoop/pom.xml b/hyracks/hyracks-dataflow-hadoop/pom.xml
index 6468118..5c05956 100644
--- a/hyracks/hyracks-dataflow-hadoop/pom.xml
+++ b/hyracks/hyracks-dataflow-hadoop/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -40,14 +40,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -67,7 +67,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
diff --git a/hyracks/hyracks-dataflow-std/pom.xml b/hyracks/hyracks-dataflow-std/pom.xml
index f88e852..dafc03f 100644
--- a/hyracks/hyracks-dataflow-std/pom.xml
+++ b/hyracks/hyracks-dataflow-std/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,14 +41,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/base/AbstractMToNConnectorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/base/AbstractMToNConnectorDescriptor.java
index 30b2482..df4d296 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/base/AbstractMToNConnectorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/base/AbstractMToNConnectorDescriptor.java
@@ -38,4 +38,9 @@
sourceBitmap.clear();
sourceBitmap.set(0, nProducerPartitions);
}
+
+ @Override
+ public boolean allProducersToAllConsumers(){
+ return true;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/collectors/NonDeterministicChannelReader.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/collectors/NonDeterministicChannelReader.java
index 9a84173..7f447c6 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/collectors/NonDeterministicChannelReader.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/collectors/NonDeterministicChannelReader.java
@@ -68,7 +68,7 @@
}
public void open() throws HyracksDataException {
- lastReadSender = 0;
+ lastReadSender = -1;
}
public IInputChannel[] getChannels() {
@@ -77,14 +77,9 @@
public synchronized int findNextSender() throws HyracksDataException {
while (true) {
- switch (lastReadSender) {
- default:
- lastReadSender = frameAvailability.nextSetBit(lastReadSender + 1);
- if (lastReadSender >= 0) {
- break;
- }
- case 0:
- lastReadSender = frameAvailability.nextSetBit(0);
+ lastReadSender = frameAvailability.nextSetBit(lastReadSender + 1);
+ if (lastReadSender < 0) {
+ lastReadSender = frameAvailability.nextSetBit(0);
}
if (lastReadSender >= 0) {
assert availableFrameCounts[lastReadSender] > 0;
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/connectors/OneToOneConnectorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/connectors/OneToOneConnectorDescriptor.java
index 466fead..20a0ed1 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/connectors/OneToOneConnectorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/connectors/OneToOneConnectorDescriptor.java
@@ -82,4 +82,9 @@
sourceBitmap.clear();
sourceBitmap.set(consumerIndex);
}
+
+ @Override
+ public boolean allProducersToAllConsumers() {
+ return false;
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/InMemoryHashJoin.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/InMemoryHashJoin.java
index d02d65c..60e9c40 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/InMemoryHashJoin.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/InMemoryHashJoin.java
@@ -119,7 +119,7 @@
accessorBuild.reset(buffers.get(bIndex));
int c = tpComparator.compare(accessorProbe, i, accessorBuild, tIndex);
if (c == 0) {
- boolean predEval = ( (predEvaluator == null) || predEvaluator.evaluate(accessorProbe, i, accessorBuild, tIndex) );
+ boolean predEval = evaluatePredicate(i, tIndex);
if(predEval){
matchFound = true;
appendToResult(i, tIndex, writer);
@@ -155,6 +155,15 @@
buffer.position(0);
buffer.limit(buffer.capacity());
}
+
+ private boolean evaluatePredicate(int tIx1, int tIx2){
+ if(reverseOutputOrder){ //Role Reversal Optimization is triggered
+ return ( (predEvaluator == null) || predEvaluator.evaluate(accessorBuild, tIx2, accessorProbe, tIx1) );
+ }
+ else {
+ return ( (predEvaluator == null) || predEvaluator.evaluate(accessorProbe, tIx1, accessorBuild, tIx2) );
+ }
+ }
private void appendToResult(int probeSidetIx, int buildSidetIx, IFrameWriter writer) throws HyracksDataException {
if (!reverseOutputOrder) {
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
index 2f719fa..979ef59 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
@@ -49,6 +49,8 @@
private final boolean isLeftOuter;
private final ArrayTupleBuilder nullTupleBuilder;
private final IPredicateEvaluator predEvaluator;
+ private boolean isReversed; //Added for handling correct calling for predicate-evaluator upon recursive calls (in OptimizedHybridHashJoin) that cause role-reversal
+
public NestedLoopJoin(IHyracksTaskContext ctx, FrameTupleAccessor accessor0, FrameTupleAccessor accessor1,
ITuplePairComparator comparators, int memSize, IPredicateEvaluator predEval, boolean isLeftOuter, INullWriter[] nullWriters1)
@@ -63,6 +65,7 @@
this.outBuffers = new ArrayList<ByteBuffer>();
this.memSize = memSize;
this.predEvaluator = predEval;
+ this.isReversed = false;
this.ctx = ctx;
this.isLeftOuter = isLeftOuter;
@@ -133,7 +136,7 @@
boolean matchFound = false;
for (int j = 0; j < tupleCount1; ++j) {
int c = compare(accessorOuter, i, accessorInner, j);
- boolean prdEval = (predEvaluator == null) || (predEvaluator.evaluate(accessorOuter, i, accessorInner, j));
+ boolean prdEval = evaluatePredicate(i, j);
if (c == 0 && prdEval) {
matchFound = true;
if (!appender.appendConcat(accessorOuter, i, accessorInner, j)) {
@@ -165,6 +168,15 @@
}
}
}
+
+ private boolean evaluatePredicate(int tIx1, int tIx2){
+ if(isReversed){ //Role Reversal Optimization is triggered
+ return ( (predEvaluator == null) || predEvaluator.evaluate(accessorInner, tIx2, accessorOuter, tIx1) );
+ }
+ else {
+ return ( (predEvaluator == null) || predEvaluator.evaluate(accessorOuter, tIx1, accessorInner, tIx2) );
+ }
+ }
public void closeCache() throws HyracksDataException {
if (runFileWriter != null) {
@@ -206,4 +218,8 @@
}
return 0;
}
+
+ public void setIsReversed(boolean b){
+ this.isReversed = b;
+ }
}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
index cd32c81..6bc810e 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
@@ -99,6 +99,7 @@
private int freeFramesCounter; //Used for partition tuning
private boolean isTableEmpty; //Added for handling the case, where build side is empty (tableSize is 0)
+ private boolean isReversed; //Added for handling correct calling for predicate-evaluator upon recursive calls that cause role-reversal
public OptimizedHybridHashJoin(IHyracksTaskContext ctx, int memForJoin, int numOfPartitions, String rel0Name,
String rel1Name, int[] keys0, int[] keys1, IBinaryComparator[] comparators, RecordDescriptor buildRd,
@@ -125,6 +126,7 @@
this.predEvaluator = predEval;
this.isLeftOuter = false;
this.nullWriters1 = null;
+ this.isReversed = false;
}
@@ -153,7 +155,8 @@
this.predEvaluator = predEval;
this.isLeftOuter = isLeftOuter;
-
+ this.isReversed = false;
+
this.nullWriters1 = isLeftOuter ? new INullWriter[nullWriterFactories1.length] : null;
if (isLeftOuter) {
for (int i = 0; i < nullWriterFactories1.length; i++) {
@@ -441,7 +444,7 @@
this.inMemJoiner = new InMemoryHashJoin(ctx, inMemTupCount,
new FrameTupleAccessor(ctx.getFrameSize(), probeRd), probeHpc, new FrameTupleAccessor(
ctx.getFrameSize(), buildRd), buildHpc, new FrameTuplePairComparator(probeKeys, buildKeys,
- comparators), isLeftOuter, nullWriters1, table, predEvaluator);
+ comparators), isLeftOuter, nullWriters1, table, predEvaluator, isReversed);
}
private void cacheInMemJoin() throws HyracksDataException {
@@ -639,4 +642,8 @@
public boolean isTableEmpty() {
return this.isTableEmpty;
}
+
+ public void setIsReversed(boolean b){
+ this.isReversed = b;
+ }
}
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index 95b7a3c..4e9376d 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -19,6 +19,8 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.BitSet;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.ActivityId;
@@ -117,6 +119,8 @@
private final boolean isLeftOuter;
private final INullWriterFactory[] nullWriterFactories1;
+
+ private static final Logger LOGGER = Logger.getLogger(OptimizedHybridHashJoinOperatorDescriptor.class.getName());
public OptimizedHybridHashJoinOperatorDescriptor(IOperatorDescriptorRegistry spec, int memsize, int inputsize0,
double factor, int[] keys0, int[] keys1, IBinaryHashFunctionFamily[] hashFunctionGeneratorFactories,
@@ -139,8 +143,6 @@
this.predEvaluatorFactory = predEvaluatorFactory;
this.isLeftOuter = isLeftOuter;
this.nullWriterFactories1 = nullWriterFactories1;
-
-
}
public OptimizedHybridHashJoinOperatorDescriptor(IOperatorDescriptorRegistry spec, int memsize, int inputsize0,
@@ -207,7 +209,7 @@
}
public static class BuildAndPartitionTaskState extends AbstractStateObject {
-
+
private int memForJoin;
private int numOfPartitions;
private OptimizedHybridHashJoin hybridHJ;
@@ -303,6 +305,7 @@
public void close() throws HyracksDataException {
state.hybridHJ.closeBuild();
ctx.setStateObject(state);
+ LOGGER.log(Level.FINE, "OptimizedHybridHashJoin closed its build phase");
}
@Override
@@ -323,7 +326,7 @@
* Hybrid Hash Join recursively on them.
*/
private class ProbeAndJoinActivityNode extends AbstractActivityNode {
-
+
private static final long serialVersionUID = 1L;
private final ActivityId buildAid;
@@ -423,9 +426,11 @@
hashFunctionGeneratorFactories).createPartitioner(level);
ITuplePartitionComputer buildHpc = new FieldHashPartitionComputerFamily(buildKeys,
hashFunctionGeneratorFactories).createPartitioner(level);
-
+
long buildPartSize = ohhj.getBuildPartitionSize(pid) / ctx.getFrameSize();
long probePartSize = ohhj.getProbePartitionSize(pid) / ctx.getFrameSize();
+
+ LOGGER.log(Level.FINE,"Joining Partition Pairs (pid "+pid+") - (level "+level+") - BuildSize:\t"+buildPartSize+"\tProbeSize:\t"+probePartSize+" - MemForJoin "+(state.memForJoin));
//Apply in-Mem HJ if possible
if ((buildPartSize < state.memForJoin) || (probePartSize < state.memForJoin)) {
@@ -460,7 +465,7 @@
else {
OptimizedHybridHashJoin rHHj;
if (isLeftOuter || buildPartSize < probePartSize) { //Build Side is smaller
-
+ LOGGER.log(Level.FINE,"\tApply RecursiveHHJ for (pid "+pid+") - (level "+level+") [buildSize is smaller]");
int n = getNumberOfPartitions(state.memForJoin, (int) buildPartSize, fudgeFactor,
nPartitions);
@@ -503,6 +508,7 @@
}
} else { //Switch to NLJ (Further recursion seems not to be useful)
+ LOGGER.log(Level.FINE,"\tSwitched to NLJ for (pid "+pid+") - (level "+level+") (reverse false) [coming from buildSize was smaller]");
for (int rPid = rPStatus.nextSetBit(0); rPid >= 0; rPid = rPStatus.nextSetBit(rPid + 1)) {
RunFileReader rbrfw = rHHj.getBuildRFReader(rPid);
RunFileReader rprfw = rHHj.getProbeRFReader(rPid);
@@ -515,19 +521,21 @@
int probeSideInTups = rHHj.getProbePartitionSizeInTup(rPid);
if (isLeftOuter || buildSideInTups < probeSideInTups) {
applyNestedLoopJoin(probeRd, buildRd, state.memForJoin, rbrfw, rprfw,
- nljComparator0);
+ nljComparator0, false);
} else {
applyNestedLoopJoin(buildRd, probeRd, state.memForJoin, rprfw, rbrfw,
- nljComparator1);
+ nljComparator1, false);
}
}
}
} else { //Role Reversal (Probe Side is smaller)
+ LOGGER.log(Level.FINE,"\tApply RecursiveHHJ for (pid "+pid+") - (level "+level+") WITH REVERSAL [probeSize is smaller]");
int n = getNumberOfPartitions(state.memForJoin, (int) probePartSize, fudgeFactor,
nPartitions);
rHHj = new OptimizedHybridHashJoin(ctx, state.memForJoin, n, BUILD_REL, PROBE_REL,
buildKeys, probeKeys, comparators, buildRd, probeRd, buildHpc, probeHpc, predEvaluator);
+ rHHj.setIsReversed(true); //Added to use predicateEvaluator (for inMemoryHashJoin) correctly
probeSideReader.open();
rHHj.initBuild();
@@ -561,7 +569,8 @@
joinPartitionPair(rHHj, rprfw, rbrfw, rPid, afterMax, (level + 1));
}
} else { //Switch to NLJ (Further recursion seems not to be effective)
- for (int rPid = rPStatus.nextSetBit(0); rPid >= 0; rPid = rPStatus.nextSetBit(rPid + 1)) {
+ LOGGER.log(Level.FINE,"\tSwitched to NLJ for (pid "+pid+") - (level "+level+") (reverse true) [coming from probeSize was smaller]");
+ for (int rPid = rPStatus.nextSetBit(0); rPid >= 0; rPid = rPStatus.nextSetBit(rPid + 1)) {
RunFileReader rbrfw = rHHj.getBuildRFReader(rPid);
RunFileReader rprfw = rHHj.getProbeRFReader(rPid);
@@ -573,10 +582,10 @@
long probeSideSize = rprfw.getFileSize();
if (buildSideSize > probeSideSize) {
applyNestedLoopJoin(buildRd, probeRd, state.memForJoin, rbrfw, rprfw,
- nljComparator1);
+ nljComparator1, true);
} else {
applyNestedLoopJoin(probeRd, buildRd, state.memForJoin, rprfw, rbrfw,
- nljComparator0);
+ nljComparator0, true);
}
}
}
@@ -590,7 +599,7 @@
RecordDescriptor probeRDesc, ITuplePartitionComputer hpcRepLarger,
ITuplePartitionComputer hpcRepSmaller, RunFileReader bReader, RunFileReader pReader, boolean reverse, int pid)
throws HyracksDataException {
-
+ LOGGER.log(Level.FINE,"\t(pid "+pid+") - applyInMemHashJoin (reversal "+reverse+")");
ISerializableTable table = new SerializableHashTable(tabSize, ctx);
InMemoryHashJoin joiner = new InMemoryHashJoin(ctx, tabSize, new FrameTupleAccessor(
ctx.getFrameSize(), probeRDesc), hpcRepLarger, new FrameTupleAccessor(ctx.getFrameSize(),
@@ -619,9 +628,9 @@
}
private void applyNestedLoopJoin(RecordDescriptor outerRd, RecordDescriptor innerRd, int memorySize,
- RunFileReader outerReader, RunFileReader innerReader, ITuplePairComparator nljComparator)
+ RunFileReader outerReader, RunFileReader innerReader, ITuplePairComparator nljComparator, boolean reverse)
throws HyracksDataException {
-
+
NestedLoopJoin nlj = new NestedLoopJoin(ctx, new FrameTupleAccessor(ctx.getFrameSize(), outerRd),
new FrameTupleAccessor(ctx.getFrameSize(), innerRd), nljComparator, memorySize, predEvaluator, false, null);
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/MaterializingOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/MaterializingOperatorDescriptor.java
index ac5a627..89c20d6 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/MaterializingOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/misc/MaterializingOperatorDescriptor.java
@@ -19,6 +19,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.ActivityId;
import edu.uci.ics.hyracks.api.dataflow.IActivityGraphBuilder;
@@ -36,6 +37,7 @@
import edu.uci.ics.hyracks.dataflow.std.base.AbstractOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractStateObject;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
public class MaterializingOperatorDescriptor extends AbstractOperatorDescriptor {
@@ -43,24 +45,43 @@
private final static int MATERIALIZER_ACTIVITY_ID = 0;
private final static int READER_ACTIVITY_ID = 1;
+ private final static int MATERIALIZER_READER_ACTIVITY_ID = 2;
+
+ private boolean isSingleActivity;
public MaterializingOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recordDescriptor) {
+ this(spec, recordDescriptor, false);
+ }
+
+ public MaterializingOperatorDescriptor(IOperatorDescriptorRegistry spec, RecordDescriptor recordDescriptor,
+ boolean isSingleActivity) {
super(spec, 1, 1);
recordDescriptors[0] = recordDescriptor;
+ this.isSingleActivity = isSingleActivity;
}
@Override
public void contributeActivities(IActivityGraphBuilder builder) {
- MaterializerActivityNode ma = new MaterializerActivityNode(new ActivityId(odId, MATERIALIZER_ACTIVITY_ID));
- ReaderActivityNode ra = new ReaderActivityNode(new ActivityId(odId, READER_ACTIVITY_ID));
+ if (isSingleActivity) {
+ MaterializerReaderActivityNode mra = new MaterializerReaderActivityNode(new ActivityId(odId,
+ MATERIALIZER_READER_ACTIVITY_ID));
- builder.addActivity(this, ma);
- builder.addSourceEdge(0, ma, 0);
+ builder.addActivity(this, mra);
+ builder.addSourceEdge(0, mra, 0);
+ builder.addTargetEdge(0, mra, 0);
+ } else {
+ MaterializerActivityNode ma = new MaterializerActivityNode(new ActivityId(odId, MATERIALIZER_ACTIVITY_ID));
+ ReaderActivityNode ra = new ReaderActivityNode(new ActivityId(odId, READER_ACTIVITY_ID));
- builder.addActivity(this, ra);
- builder.addTargetEdge(0, ra, 0);
+ builder.addActivity(this, ma);
+ builder.addSourceEdge(0, ma, 0);
- builder.addBlockingEdge(ma, ra);
+ builder.addActivity(this, ra);
+ builder.addTargetEdge(0, ra, 0);
+
+ builder.addBlockingEdge(ma, ra);
+ }
+
}
public static class MaterializerTaskState extends AbstractStateObject {
@@ -82,6 +103,76 @@
public void fromBytes(DataInput in) throws IOException {
}
+
+ public void open(IHyracksTaskContext ctx) throws HyracksDataException {
+ FileReference file = ctx.getJobletContext().createManagedWorkspaceFile(
+ MaterializingOperatorDescriptor.class.getSimpleName());
+ out = new RunFileWriter(file, ctx.getIOManager());
+ out.open();
+ }
+
+ public void appendFrame(ByteBuffer buffer) throws HyracksDataException {
+ out.nextFrame(buffer);
+ }
+
+ public void writeOut(IFrameWriter writer, ByteBuffer frame) throws HyracksDataException {
+ RunFileReader in = out.createReader();
+ writer.open();
+ try {
+ in.open();
+ while (in.nextFrame(frame)) {
+ frame.flip();
+ writer.nextFrame(frame);
+ frame.clear();
+ }
+ in.close();
+ } catch (Exception e) {
+ writer.fail();
+ throw new HyracksDataException(e);
+ } finally {
+ writer.close();
+ }
+ }
+ }
+
+ private final class MaterializerReaderActivityNode extends AbstractActivityNode {
+ private static final long serialVersionUID = 1L;
+
+ public MaterializerReaderActivityNode(ActivityId id) {
+ super(id);
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) {
+ return new AbstractUnaryInputUnaryOutputOperatorNodePushable() {
+ private MaterializerTaskState state;
+
+ @Override
+ public void open() throws HyracksDataException {
+ state = new MaterializerTaskState(ctx.getJobletContext().getJobId(), new TaskId(getActivityId(),
+ partition));
+ state.open(ctx);
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ state.appendFrame(buffer);
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ state.out.close();
+ ByteBuffer frame = ctx.allocateFrame();
+ state.writeOut(writer, frame);
+ }
+
+ };
+ }
}
private final class MaterializerActivityNode extends AbstractActivityNode {
@@ -101,15 +192,12 @@
public void open() throws HyracksDataException {
state = new MaterializerTaskState(ctx.getJobletContext().getJobId(), new TaskId(getActivityId(),
partition));
- FileReference file = ctx.getJobletContext().createManagedWorkspaceFile(
- MaterializingOperatorDescriptor.class.getSimpleName());
- state.out = new RunFileWriter(file, ctx.getIOManager());
- state.out.open();
+ state.open(ctx);
}
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- state.out.nextFrame(buffer);
+ state.appendFrame(buffer);
}
@Override
@@ -141,22 +229,7 @@
ByteBuffer frame = ctx.allocateFrame();
MaterializerTaskState state = (MaterializerTaskState) ctx.getStateObject(new TaskId(new ActivityId(
getOperatorId(), MATERIALIZER_ACTIVITY_ID), partition));
- RunFileReader in = state.out.createReader();
- writer.open();
- try {
- in.open();
- while (in.nextFrame(frame)) {
- frame.flip();
- writer.nextFrame(frame);
- frame.clear();
- }
- in.close();
- } catch (Exception e) {
- writer.fail();
- throw new HyracksDataException(e);
- } finally {
- writer.close();
- }
+ state.writeOut(writer, frame);
}
@Override
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
index db70a62..a6bb4e2 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/sort/FrameSorter.java
@@ -29,6 +29,7 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.common.util.IntSerDeUtils;
public class FrameSorter {
private final IHyracksTaskContext ctx;
@@ -46,6 +47,7 @@
private int dataFrameCount;
private int[] tPointers;
+ private int[] tPointersTemp;
private int tupleCount;
public FrameSorter(IHyracksTaskContext ctx, int[] sortFields,
@@ -116,7 +118,8 @@
}
}
if (tupleCount > 0) {
- sort(tPointers, 0, tupleCount);
+ tPointersTemp = new int[tPointers.length];
+ sort(0, tupleCount);
}
}
@@ -142,75 +145,73 @@
}
}
- private void sort(int[] tPointers, int offset, int length) {
- int m = offset + (length >> 1);
- int mi = tPointers[m * 4];
- int mj = tPointers[m * 4 + 1];
- int mv = tPointers[m * 4 + 3];
-
- int a = offset;
- int b = a;
- int c = offset + length - 1;
- int d = c;
- while (true) {
- while (b <= c) {
- int cmp = compare(tPointers, b, mi, mj, mv);
- if (cmp > 0) {
- break;
+ private void sort(int offset, int length) {
+ int step = 1;
+ int len = length;
+ int end = offset + len;
+ /** bottom-up merge */
+ while (step < len) {
+ /** merge */
+ for (int i = offset; i < end; i += 2 * step) {
+ int next = i + step;
+ if (next < end) {
+ merge(i, next, step, Math.min(step, end - next));
+ } else {
+ System.arraycopy(tPointers, i * 4, tPointersTemp, i * 4, (end - i) * 4);
}
- if (cmp == 0) {
- swap(tPointers, a++, b);
- }
- ++b;
}
- while (c >= b) {
- int cmp = compare(tPointers, c, mi, mj, mv);
- if (cmp < 0) {
- break;
- }
- if (cmp == 0) {
- swap(tPointers, c, d--);
- }
- --c;
+ /** prepare next phase merge */
+ step *= 2;
+ int[] tmp = tPointersTemp;
+ tPointersTemp = tPointers;
+ tPointers = tmp;
+ }
+ }
+
+ /** Merge two subarrays into one */
+ private void merge(int start1, int start2, int len1, int len2) {
+ int targetPos = start1;
+ int pos1 = start1;
+ int pos2 = start2;
+ int end1 = start1 + len1 - 1;
+ int end2 = start2 + len2 - 1;
+ while (pos1 <= end1 && pos2 <= end2) {
+ int cmp = compare(pos1, pos2);
+ if (cmp <= 0) {
+ copy(pos1, targetPos);
+ pos1++;
+ } else {
+ copy(pos2, targetPos);
+ pos2++;
}
- if (b > c)
- break;
- swap(tPointers, b++, c--);
+ targetPos++;
}
-
- int s;
- int n = offset + length;
- s = Math.min(a - offset, b - a);
- vecswap(tPointers, offset, b - s, s);
- s = Math.min(d - c, n - d - 1);
- vecswap(tPointers, b, n - s, s);
-
- if ((s = b - a) > 1) {
- sort(tPointers, offset, s);
+ if (pos1 <= end1) {
+ int rest = end1 - pos1 + 1;
+ System.arraycopy(tPointers, pos1 * 4, tPointersTemp, targetPos * 4, rest * 4);
}
- if ((s = d - c) > 1) {
- sort(tPointers, n - s, s);
+ if (pos2 <= end2) {
+ int rest = end2 - pos2 + 1;
+ System.arraycopy(tPointers, pos2 * 4, tPointersTemp, targetPos * 4, rest * 4);
}
}
- private void swap(int x[], int a, int b) {
- for (int i = 0; i < 4; ++i) {
- int t = x[a * 4 + i];
- x[a * 4 + i] = x[b * 4 + i];
- x[b * 4 + i] = t;
- }
+ private void copy(int src, int dest) {
+ tPointersTemp[dest * 4] = tPointers[src * 4];
+ tPointersTemp[dest * 4 + 1] = tPointers[src * 4 + 1];
+ tPointersTemp[dest * 4 + 2] = tPointers[src * 4 + 2];
+ tPointersTemp[dest * 4 + 3] = tPointers[src * 4 + 3];
}
- private void vecswap(int x[], int a, int b, int n) {
- for (int i = 0; i < n; i++, a++, b++) {
- swap(x, a, b);
- }
- }
-
- private int compare(int[] tPointers, int tp1, int tp2i, int tp2j, int tp2v) {
+ private int compare(int tp1, int tp2) {
int i1 = tPointers[tp1 * 4];
int j1 = tPointers[tp1 * 4 + 1];
int v1 = tPointers[tp1 * 4 + 3];
+
+ int tp2i = tPointers[tp2 * 4];
+ int tp2j = tPointers[tp2 * 4 + 1];
+ int tp2v = tPointers[tp2 * 4 + 3];
+
if (v1 != tp2v) {
return ((((long) v1) & 0xffffffffL) < (((long) tp2v) & 0xffffffffL)) ? -1 : 1;
}
@@ -224,12 +225,12 @@
fta2.reset(buf2);
for (int f = 0; f < comparators.length; ++f) {
int fIdx = sortFields[f];
- int f1Start = fIdx == 0 ? 0 : buf1.getInt(j1 + (fIdx - 1) * 4);
- int f1End = buf1.getInt(j1 + fIdx * 4);
+ int f1Start = fIdx == 0 ? 0 : IntSerDeUtils.getInt(buf1.array(), j1 + (fIdx - 1) * 4);
+ int f1End = IntSerDeUtils.getInt(buf1.array(), j1 + fIdx * 4);
int s1 = j1 + fta1.getFieldSlotsLength() + f1Start;
int l1 = f1End - f1Start;
- int f2Start = fIdx == 0 ? 0 : buf2.getInt(j2 + (fIdx - 1) * 4);
- int f2End = buf2.getInt(j2 + fIdx * 4);
+ int f2Start = fIdx == 0 ? 0 : IntSerDeUtils.getInt(buf2.array(), j2 + (fIdx - 1) * 4);
+ int f2End = IntSerDeUtils.getInt(buf2.array(), j2 + fIdx * 4);
int s2 = j2 + fta2.getFieldSlotsLength() + f2Start;
int l2 = f2End - f2Start;
int c = comparators[f].compare(b1, s1, l1, b2, s2, l2);
diff --git a/hyracks/hyracks-dist/pom.xml b/hyracks/hyracks-dist/pom.xml
index 7e9526b..ab7ea53 100755
--- a/hyracks/hyracks-dist/pom.xml
+++ b/hyracks/hyracks-dist/pom.xml
@@ -18,7 +18,7 @@
<parent>
<artifactId>hyracks</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>hyracks-dist</artifactId>
diff --git a/hyracks/hyracks-documentation/pom.xml b/hyracks/hyracks-documentation/pom.xml
index a814994..4bc3f25 100644
--- a/hyracks/hyracks-documentation/pom.xml
+++ b/hyracks/hyracks-documentation/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
diff --git a/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml b/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
index 0a5331f..27ab200 100644
--- a/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/btreeclient/pom.xml
@@ -20,26 +20,26 @@
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>btree-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks.examples.btree</groupId>
<artifactId>btreehelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml b/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
index 3d701ce..85eadf1 100644
--- a/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/btreehelper/pom.xml
@@ -20,32 +20,32 @@
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>btree-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
diff --git a/hyracks/hyracks-examples/btree-example/btreeserver/pom.xml b/hyracks/hyracks-examples/btree-example/btreeserver/pom.xml
index fef46d8..3d3dc73 100644
--- a/hyracks/hyracks-examples/btree-example/btreeserver/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/btreeserver/pom.xml
@@ -16,13 +16,13 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.btree</groupId>
<artifactId>btreeserver</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<name>btreeserver</name>
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>btree-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -77,20 +77,20 @@
<dependency>
<groupId>edu.uci.ics.hyracks.examples.btree</groupId>
<artifactId>btreehelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/btree-example/pom.xml b/hyracks/hyracks-examples/btree-example/pom.xml
index dc3293d..dcab262 100644
--- a/hyracks/hyracks-examples/btree-example/pom.xml
+++ b/hyracks/hyracks-examples/btree-example/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
index f18d6bf..b5dd798 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatclient/pom.xml
@@ -21,20 +21,20 @@
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>hadoop-compat-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompathelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
index fb595fd..2b703b2 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompathelper/pom.xml
@@ -21,20 +21,20 @@
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>hadoop-compat-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
index d2062d3..ba30424 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/hadoopcompatserver/pom.xml
@@ -16,13 +16,13 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompatserver</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<name>hadoopcompatserver</name>
<parent>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>hadoop-compat-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -74,7 +74,7 @@
<plugin>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-virtualcluster-maven-plugin</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<configuration>
<hyracksServerHome>${basedir}/target/hadoopcompatserver-${project.version}-binary-assembly</hyracksServerHome>
<jvmOptions>${jvm.extraargs}</jvmOptions>
@@ -149,27 +149,27 @@
<dependency>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompathelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks.examples.compat</groupId>
<artifactId>hadoopcompatclient</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/hadoop-compat-example/pom.xml b/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
index af88b09..5588134 100644
--- a/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
+++ b/hyracks/hyracks-examples/hadoop-compat-example/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
@@ -43,7 +43,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-hadoop-compat</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml b/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
index a4fcf28..4893be4 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
<plugins>
@@ -46,75 +46,75 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-client</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/pom.xml b/hyracks/hyracks-examples/pom.xml
index 6296f5e..08f4a90 100644
--- a/hyracks/hyracks-examples/pom.xml
+++ b/hyracks/hyracks-examples/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-examples/text-example/pom.xml b/hyracks/hyracks-examples/text-example/pom.xml
index 2359f96..ed9924a 100644
--- a/hyracks/hyracks-examples/text-example/pom.xml
+++ b/hyracks/hyracks-examples/text-example/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-examples/text-example/textclient/pom.xml b/hyracks/hyracks-examples/text-example/textclient/pom.xml
index 0cd3d6c..6923241 100644
--- a/hyracks/hyracks-examples/text-example/textclient/pom.xml
+++ b/hyracks/hyracks-examples/text-example/textclient/pom.xml
@@ -20,20 +20,20 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>text-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>texthelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/text-example/texthelper/pom.xml b/hyracks/hyracks-examples/text-example/texthelper/pom.xml
index e02138f..0c708a1 100644
--- a/hyracks/hyracks-examples/text-example/texthelper/pom.xml
+++ b/hyracks/hyracks-examples/text-example/texthelper/pom.xml
@@ -20,26 +20,26 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>text-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
diff --git a/hyracks/hyracks-examples/text-example/textserver/pom.xml b/hyracks/hyracks-examples/text-example/textserver/pom.xml
index aad72bd..b12f823 100644
--- a/hyracks/hyracks-examples/text-example/textserver/pom.xml
+++ b/hyracks/hyracks-examples/text-example/textserver/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>text-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -73,7 +73,7 @@
<plugin>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-virtualcluster-maven-plugin</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<configuration>
<hyracksServerHome>${basedir}/target/textserver-${project.version}-binary-assembly</hyracksServerHome>
<jvmOptions>${jvm.extraargs}</jvmOptions>
@@ -148,27 +148,27 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>texthelper</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>textclient</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-examples/tpch-example/pom.xml b/hyracks/hyracks-examples/tpch-example/pom.xml
index 129e38f..1b5f2b5 100644
--- a/hyracks/hyracks-examples/tpch-example/pom.xml
+++ b/hyracks/hyracks-examples/tpch-example/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-examples</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml b/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
index 1123fe4..a872ecd 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/pom.xml
@@ -19,20 +19,20 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>tpch-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
diff --git a/hyracks/hyracks-examples/tpch-example/tpchserver/pom.xml b/hyracks/hyracks-examples/tpch-example/tpchserver/pom.xml
index d58f28c..712ba4c 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchserver/pom.xml
+++ b/hyracks/hyracks-examples/tpch-example/tpchserver/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>tpch-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -76,25 +76,25 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-hadoop-compat/pom.xml b/hyracks/hyracks-hadoop-compat/pom.xml
index 0a34b34..f6cfb71 100644
--- a/hyracks/hyracks-hadoop-compat/pom.xml
+++ b/hyracks/hyracks-hadoop-compat/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -94,7 +94,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-hadoop</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
index b7a8676..b93365d 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>hyracks-hdfs</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -112,7 +112,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
index 34400b6..7cd1091 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
@@ -20,7 +20,7 @@
<parent>
<artifactId>hyracks-hdfs</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
<plugins>
@@ -216,7 +216,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
index b8453dd..b51d41d 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
@@ -1,26 +1,22 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hyracks-hdfs-core</artifactId>
<name>hyracks-hdfs-core</name>
<parent>
<artifactId>hyracks-hdfs</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -66,6 +62,18 @@
</filesets>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.2</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>test-jar</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
@@ -192,31 +200,31 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
index 1852a6f..57c20e0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
@@ -38,18 +38,13 @@
public void open(IFrameWriter writer) throws HyracksDataException;
/**
- * Parse a key-value pair returned by HDFS record reader to a tuple.
- * when the parsers' internal buffer is full, it can flush the buffer to the writer
- *
* @param key
- * The key returned from Hadoop's InputReader.
* @param value
- * The value returned from Hadoop's InputReader.
* @param writer
- * The hyracks writer for outputting data.
+ * @param fileName
* @throws HyracksDataException
*/
- public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException;
+ public void parse(K key, V value, IFrameWriter writer, String fileString) throws HyracksDataException;
/**
* Flush the residual tuples in the internal buffer to the writer.
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
index 674873d..7e6e4dc 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
@@ -30,6 +30,6 @@
* the IHyracksTaskContext
* @return a tuple writer instance
*/
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException;
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx, int partition, int nPartition) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
index 2cff534..814be7a 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
@@ -98,8 +98,9 @@
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
- Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
+ Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
JobConf conf = confFactory.getConf();
+ conf.setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
writer.open();
parser.open(writer);
@@ -128,7 +129,7 @@
Object key = reader.createKey();
Object value = reader.createValue();
while (reader.next(key, value) == true) {
- parser.parse(key, value, writer);
+ parser.parse(key, value, writer, inputSplits[i].toString());
}
}
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
index 432849b..4e48e9b 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
@@ -89,7 +89,7 @@
String outputDirPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputDirPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx, partition, nPartitions);
try {
FileSystem dfs = FileSystem.get(conf);
dos = dfs.create(new Path(fileName), true);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
index fbac95b..92cde9d 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
@@ -49,7 +49,8 @@
}
@Override
- public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
+ public void parse(LongWritable key, Text value, IFrameWriter writer, String fileString)
+ throws HyracksDataException {
tb.reset();
tb.addField(value.getBytes(), 0, value.getLength());
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
index 92a427e..60be1f7 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
@@ -27,7 +27,7 @@
private static final long serialVersionUID = 1L;
@Override
- public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) {
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx, int partition, int nPartition) {
return new ITupleWriter() {
private byte newLine = "\n".getBytes()[0];
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
index 3f01d77..bc47360 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
@@ -112,7 +112,7 @@
public void initialize() throws HyracksDataException {
ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
try {
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
Job job = confFactory.getConf();
job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
@@ -141,10 +141,12 @@
* read the split
*/
TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
+ context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
reader.initialize(inputSplits.get(i), context);
while (reader.nextKeyValue() == true) {
- parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer);
+ parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer,
+ inputSplits.get(i).toString());
}
}
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
index 77b8c7e..068cdfc 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
@@ -88,7 +88,7 @@
String outputPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx, partition, nPartitions);
try {
FileSystem dfs = FileSystem.get(conf.getConfiguration());
dos = dfs.create(new Path(fileName), true);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
index 75553e1..85f80ac 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
@@ -23,6 +23,7 @@
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
import edu.uci.ics.hyracks.hdfs.api.INcCollectionBuilder;
/**
@@ -54,7 +55,20 @@
public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ncNameToNcInfos);
}
-
+
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @param topology
+ * the hyracks cluster toplogy
+ * @throws HyracksException
+ */
+ public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, ClusterTopology topology) throws HyracksException {
+ scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ncNameToNcInfos, topology);
+ }
+
/**
* The constructor of the scheduler.
*
@@ -62,7 +76,8 @@
* the mapping from nc names to nc infos
* @throws HyracksException
*/
- public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, INcCollectionBuilder builder) throws HyracksException {
+ public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos, INcCollectionBuilder builder)
+ throws HyracksException {
scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ncNameToNcInfos, builder);
}
diff --git a/hyracks/hyracks-hdfs/pom.xml b/hyracks/hyracks-hdfs/pom.xml
index 0035285..b174d37 100644
--- a/hyracks/hyracks-hdfs/pom.xml
+++ b/hyracks/hyracks-hdfs/pom.xml
@@ -22,7 +22,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/hyracks-ipc/pom.xml b/hyracks/hyracks-ipc/pom.xml
index 88bb49c..fed59e6 100644
--- a/hyracks/hyracks-ipc/pom.xml
+++ b/hyracks/hyracks-ipc/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
diff --git a/hyracks/hyracks-ipc/src/main/java/edu/uci/ics/hyracks/ipc/impl/IPCConnectionManager.java b/hyracks/hyracks-ipc/src/main/java/edu/uci/ics/hyracks/ipc/impl/IPCConnectionManager.java
index c52dbd8..81294c2 100644
--- a/hyracks/hyracks-ipc/src/main/java/edu/uci/ics/hyracks/ipc/impl/IPCConnectionManager.java
+++ b/hyracks/hyracks-ipc/src/main/java/edu/uci/ics/hyracks/ipc/impl/IPCConnectionManager.java
@@ -17,6 +17,7 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
+import java.net.StandardSocketOptions;
import java.nio.ByteBuffer;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.SelectableChannel;
@@ -175,6 +176,7 @@
if (!workingPendingConnections.isEmpty()) {
for (IPCHandle handle : workingPendingConnections) {
SocketChannel channel = SocketChannel.open();
+ channel.setOption(StandardSocketOptions.TCP_NODELAY, true);
channel.configureBlocking(false);
SelectionKey cKey = null;
if (channel.connect(handle.getRemoteAddress())) {
@@ -267,6 +269,7 @@
} else if (key.isAcceptable()) {
assert sc == serverSocketChannel;
SocketChannel channel = serverSocketChannel.accept();
+ channel.setOption(StandardSocketOptions.TCP_NODELAY, true);
channel.configureBlocking(false);
IPCHandle handle = new IPCHandle(system, null);
SelectionKey cKey = channel.register(selector, SelectionKey.OP_READ);
diff --git a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
index df11910..5ffbf28 100644
--- a/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
+++ b/hyracks/hyracks-maven-plugins/hyracks-virtualcluster-maven-plugin/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-maven-plugins</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
diff --git a/hyracks/hyracks-maven-plugins/pom.xml b/hyracks/hyracks-maven-plugins/pom.xml
index 6d8a2ec..a2b8d03 100644
--- a/hyracks/hyracks-maven-plugins/pom.xml
+++ b/hyracks/hyracks-maven-plugins/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
diff --git a/hyracks/hyracks-net/pom.xml b/hyracks/hyracks-net/pom.xml
index e5853b7..a188f4c 100644
--- a/hyracks/hyracks-net/pom.xml
+++ b/hyracks/hyracks-net/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
diff --git a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
index 932d71a..067898f 100644
--- a/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
+++ b/hyracks/hyracks-net/src/main/java/edu/uci/ics/hyracks/net/protocols/tcp/TCPEndpoint.java
@@ -17,6 +17,7 @@
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
+import java.net.StandardSocketOptions;
import java.nio.channels.SelectableChannel;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
@@ -118,6 +119,7 @@
if (!workingPendingConnections.isEmpty()) {
for (InetSocketAddress address : workingPendingConnections) {
SocketChannel channel = SocketChannel.open();
+ channel.setOption(StandardSocketOptions.TCP_NODELAY, true);
channel.configureBlocking(false);
boolean connect = false;
boolean failure = false;
@@ -143,6 +145,7 @@
}
if (!workingIncomingConnections.isEmpty()) {
for (SocketChannel channel : workingIncomingConnections) {
+ channel.setOption(StandardSocketOptions.TCP_NODELAY, true);
channel.configureBlocking(false);
SelectionKey sKey = channel.register(selector, 0);
TCPConnection connection = new TCPConnection(TCPEndpoint.this, channel, sKey, selector);
diff --git a/hyracks/hyracks-server/pom.xml b/hyracks/hyracks-server/pom.xml
index 646880d..f691481 100644
--- a/hyracks/hyracks-server/pom.xml
+++ b/hyracks/hyracks-server/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -88,14 +88,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-bloomfilter/pom.xml b/hyracks/hyracks-storage-am-bloomfilter/pom.xml
index 7d9a92e..a712ab4 100644
--- a/hyracks/hyracks-storage-am-bloomfilter/pom.xml
+++ b/hyracks/hyracks-storage-am-bloomfilter/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,7 +39,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
index 6811e1b..fe25db8 100644
--- a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
+++ b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilter.java
@@ -47,7 +47,8 @@
private int numHashes;
private long numElements;
private long numBits;
- private int numBitsPerPage;
+ private final int numBitsPerPage;
+ private final static byte[] ZERO_BUFFER = new byte[131072]; // 128kb
private final ArrayList<ICachedPage> bloomFilterPages = new ArrayList<ICachedPage>();
private final static long SEED = 0L;
@@ -58,7 +59,7 @@
this.fileMapProvider = fileMapProvider;
this.file = file;
this.keyFields = keyFields;
- numBitsPerPage = bufferCache.getPageSize() * Byte.SIZE;
+ this.numBitsPerPage = bufferCache.getPageSize() * Byte.SIZE;
}
public int getFileId() {
@@ -198,7 +199,6 @@
public class BloomFilterBuilder implements IIndexBulkLoader {
private final long[] hashes = new long[2];
-
private final long numElements;
private final int numHashes;
private final long numBits;
@@ -217,16 +217,28 @@
throw new HyracksDataException("Cannot create a bloom filter with his huge number of pages.");
}
numPages = (int) tmp;
- if (this.numElements > 0) {
- persistBloomFilterMetaData();
- readBloomFilterMetaData();
- int currentPageId = 1;
- while (currentPageId <= numPages) {
- ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
- page.acquireWriteLatch();
- bloomFilterPages.add(page);
- ++currentPageId;
- }
+ persistBloomFilterMetaData();
+ readBloomFilterMetaData();
+ int currentPageId = 1;
+ while (currentPageId <= numPages) {
+ ICachedPage page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, currentPageId), true);
+ page.acquireWriteLatch();
+ initPage(page.getBuffer().array());
+ bloomFilterPages.add(page);
+ ++currentPageId;
+ }
+ }
+
+ private void initPage(byte[] array) {
+ int numRounds = array.length / ZERO_BUFFER.length;
+ int leftOver = array.length % ZERO_BUFFER.length;
+ int destPos = 0;
+ for (int i = 0; i < numRounds; i++) {
+ System.arraycopy(ZERO_BUFFER, 0, array, destPos, ZERO_BUFFER.length);
+ destPos = (i + 1) * ZERO_BUFFER.length;
+ }
+ if (leftOver > 0) {
+ System.arraycopy(ZERO_BUFFER, 0, array, destPos, leftOver);
}
}
diff --git a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
index a1e5517..0c5d7c8 100644
--- a/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
+++ b/hyracks/hyracks-storage-am-bloomfilter/src/main/java/edu/uci/ics/hyracks/storage/am/bloomfilter/impls/BloomFilterSpecification.java
@@ -16,12 +16,12 @@
package edu.uci.ics.hyracks.storage.am.bloomfilter.impls;
public final class BloomFilterSpecification {
- private final int numBucketsPerElement;
private final int numHashes;
+ private final int numBucketsPerElement;
- public BloomFilterSpecification(int numBucketsPerElement, int numHashes) {
- this.numBucketsPerElement = numBucketsPerElement;
+ public BloomFilterSpecification(int numHashes, int numBucketsPerElement) {
this.numHashes = numHashes;
+ this.numBucketsPerElement = numBucketsPerElement;
}
public int getNumBucketsPerElements() {
@@ -31,4 +31,4 @@
public int getNumHashes() {
return numHashes;
}
-}
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-btree/pom.xml b/hyracks/hyracks-storage-am-btree/pom.xml
index 4878a44..2746666 100644
--- a/hyracks/hyracks-storage-am-btree/pom.xml
+++ b/hyracks/hyracks-storage-am-btree/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,28 +41,28 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeNonExistentKeyException.java b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeNonExistentKeyException.java
deleted file mode 100644
index 7237046..0000000
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeNonExistentKeyException.java
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
-
-public class BTreeNonExistentKeyException extends BTreeException {
-
- private static final long serialVersionUID = 1L;
-
- public BTreeNonExistentKeyException(Exception e) {
- super(e);
- }
-
- public BTreeNonExistentKeyException(String message) {
- super(message);
- }
-}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeFieldPrefixNSMLeafFrame.java b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeFieldPrefixNSMLeafFrame.java
index b503c8b..93cde3d 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeFieldPrefixNSMLeafFrame.java
+++ b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeFieldPrefixNSMLeafFrame.java
@@ -25,8 +25,6 @@
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.api.IPrefixSlotManager;
import edu.uci.ics.hyracks.storage.am.btree.compressors.FieldPrefixCompressor;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
import edu.uci.ics.hyracks.storage.am.btree.impls.FieldPrefixPrefixTupleReference;
import edu.uci.ics.hyracks.storage.am.btree.impls.FieldPrefixSlotManager;
@@ -37,6 +35,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.frames.FrameOpSpaceStatus;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.FindTupleMode;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.FindTupleNoExactMatchPolicy;
@@ -370,7 +370,7 @@
int tupleIndex = slotManager.decodeSecondSlotField(slot);
// Error indicator is set if there is an exact match.
if (tupleIndex == slotManager.getErrorIndicator()) {
- throw new BTreeDuplicateKeyException("Trying to insert duplicate key into leaf node.");
+ throw new TreeIndexDuplicateKeyException("Trying to insert duplicate key into leaf node.");
}
return slot;
}
@@ -382,7 +382,7 @@
int tupleIndex = slotManager.decodeSecondSlotField(slot);
// Error indicator is set if there is an exact match.
if (tupleIndex == slotManager.getErrorIndicator()) {
- throw new BTreeDuplicateKeyException("Trying to insert duplicate key into leaf node.");
+ throw new TreeIndexDuplicateKeyException("Trying to insert duplicate key into leaf node.");
}
return slot;
}
@@ -411,7 +411,7 @@
int tupleIndex = slotManager.decodeSecondSlotField(slot);
// Error indicator is set if there is no exact match.
if (tupleIndex == slotManager.getErrorIndicator()) {
- throw new BTreeNonExistentKeyException("Trying to update a tuple with a nonexistent key in leaf node.");
+ throw new TreeIndexNonExistentKeyException("Trying to update a tuple with a nonexistent key in leaf node.");
}
return slot;
}
@@ -423,7 +423,7 @@
int tupleIndex = slotManager.decodeSecondSlotField(slot);
// Error indicator is set if there is no exact match.
if (tupleIndex == slotManager.getErrorIndicator()) {
- throw new BTreeNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
+ throw new TreeIndexNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
}
return slot;
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
index 1974989..187cd52 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
+++ b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/frames/BTreeNSMLeafFrame.java
@@ -20,14 +20,14 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
import edu.uci.ics.hyracks.storage.am.common.api.ISplitKey;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.frames.TreeIndexNSMFrame;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.FindTupleMode;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.FindTupleNoExactMatchPolicy;
@@ -72,7 +72,7 @@
FindTupleNoExactMatchPolicy.HIGHER_KEY);
// Error indicator is set if there is an exact match.
if (tupleIndex == slotManager.getErrorIndicator()) {
- throw new BTreeDuplicateKeyException("Trying to insert duplicate key into leaf node.");
+ throw new TreeIndexDuplicateKeyException("Trying to insert duplicate key into leaf node.");
}
return tupleIndex;
}
@@ -83,7 +83,7 @@
FindTupleNoExactMatchPolicy.HIGHER_KEY);
// Error indicator is set if there is no exact match.
if (tupleIndex == slotManager.getErrorIndicator() || tupleIndex == slotManager.getGreatestKeyIndicator()) {
- throw new BTreeNonExistentKeyException("Trying to update a tuple with a nonexistent key in leaf node.");
+ throw new TreeIndexNonExistentKeyException("Trying to update a tuple with a nonexistent key in leaf node.");
}
return tupleIndex;
}
@@ -121,7 +121,7 @@
FindTupleNoExactMatchPolicy.HIGHER_KEY);
// Error indicator is set if there is no exact match.
if (tupleIndex == slotManager.getErrorIndicator() || tupleIndex == slotManager.getGreatestKeyIndicator()) {
- throw new BTreeNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
+ throw new TreeIndexNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
}
return tupleIndex;
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
index 689843b..ff94040 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/impls/BTree.java
@@ -33,7 +33,6 @@
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.btree.api.ITupleAcceptor;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNotUpdateableException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrame;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeOpContext.PageValidationInfo;
@@ -53,6 +52,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.api.UnsortedInputException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.frames.FrameOpSpaceStatus;
import edu.uci.ics.hyracks.storage.am.common.impls.AbstractTreeIndex;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
@@ -562,7 +563,7 @@
// This means that there could be underflow, even an empty page that is
// pointed to by an interior node.
if (ctx.leafFrame.getTupleCount() == 0) {
- throw new BTreeNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
+ throw new TreeIndexNonExistentKeyException("Trying to delete a tuple with a nonexistent key in leaf node.");
}
int tupleIndex = ctx.leafFrame.findDeleteTupleIndex(tuple);
ITupleReference beforeTuple = ctx.leafFrame.getMatchingKeyTuple(tuple, tupleIndex);
@@ -1024,9 +1025,12 @@
protected void verifyInputTuple(ITupleReference tuple, ITupleReference prevTuple) throws IndexException,
HyracksDataException {
// New tuple should be strictly greater than last tuple.
- if (cmp.compare(tuple, prevTuple) <= 0) {
- throw new UnsortedInputException(
- "Input stream given to BTree bulk load is not sorted or has duplicates.");
+ int cmpResult = cmp.compare(tuple, prevTuple);
+ if (cmpResult < 0) {
+ throw new UnsortedInputException("Input stream given to BTree bulk load is not sorted.");
+ }
+ if (cmpResult == 0) {
+ throw new TreeIndexDuplicateKeyException("Input stream given to BTree bulk load has duplicates.");
}
}
diff --git a/hyracks/hyracks-storage-am-common/pom.xml b/hyracks/hyracks-storage-am-common/pom.xml
index 33bbe5e..309f183 100644
--- a/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks/hyracks-storage-am-common/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,28 +41,28 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexInsertUpdateDeleteOperatorNodePushable.java b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexInsertUpdateDeleteOperatorNodePushable.java
index 4a9cfb4..092fada 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexInsertUpdateDeleteOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexInsertUpdateDeleteOperatorNodePushable.java
@@ -30,6 +30,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ITupleFilter;
import edu.uci.ics.hyracks.storage.am.common.api.ITupleFilterFactory;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingFrameTupleReference;
@@ -97,7 +99,11 @@
switch (op) {
case INSERT: {
- indexAccessor.insert(tuple);
+ try {
+ indexAccessor.insert(tuple);
+ } catch (TreeIndexDuplicateKeyException e) {
+ // ingnore that exception to allow inserting existing keys which becomes an NoOp
+ }
break;
}
case UPDATE: {
@@ -109,7 +115,11 @@
break;
}
case DELETE: {
- indexAccessor.delete(tuple);
+ try {
+ indexAccessor.delete(tuple);
+ } catch (TreeIndexNonExistentKeyException e) {
+ // ingnore that exception to allow deletions of non-existing keys
+ }
break;
}
default: {
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexSearchOperatorNodePushable.java b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexSearchOperatorNodePushable.java
index a79daef..83fb5ee 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexSearchOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/dataflow/IndexSearchOperatorNodePushable.java
@@ -119,7 +119,8 @@
FrameUtils.flushFrame(writeBuffer, writer);
appender.reset(writeBuffer, true);
if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
- throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + appender.getBuffer().capacity() + ")");
+ throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size ("
+ + appender.getBuffer().capacity() + ")");
}
}
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexDuplicateKeyException.java
similarity index 70%
rename from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
rename to hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexDuplicateKeyException.java
index cde5022..1767504 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexDuplicateKeyException.java
@@ -13,16 +13,18 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.hyracks.storage.am.common.exceptions;
-public class BTreeDuplicateKeyException extends BTreeException {
+import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+
+public class TreeIndexDuplicateKeyException extends TreeIndexException {
private static final long serialVersionUID = 1L;
- public BTreeDuplicateKeyException(Exception e) {
+ public TreeIndexDuplicateKeyException(Exception e) {
super(e);
}
- public BTreeDuplicateKeyException(String message) {
+ public TreeIndexDuplicateKeyException(String message) {
super(message);
}
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexNonExistentKeyException.java
similarity index 61%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexNonExistentKeyException.java
index cde5022..8b62063 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/exceptions/TreeIndexNonExistentKeyException.java
@@ -13,16 +13,19 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.hyracks.storage.am.common.exceptions;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
- }
+public class TreeIndexNonExistentKeyException extends TreeIndexException {
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
+ private static final long serialVersionUID = 1L;
+
+ public TreeIndexNonExistentKeyException(Exception e) {
+ super(e);
+ }
+
+ public TreeIndexNonExistentKeyException(String message) {
+ super(message);
+ }
}
diff --git a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/impls/AbstractTreeIndex.java b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/impls/AbstractTreeIndex.java
index 19d40a0..c14f23a 100644
--- a/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/impls/AbstractTreeIndex.java
+++ b/hyracks/hyracks-storage-am-common/src/main/java/edu/uci/ics/hyracks/storage/am/common/impls/AbstractTreeIndex.java
@@ -12,352 +12,360 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-package edu.uci.ics.hyracks.storage.am.common.impls;
-
-import java.util.ArrayList;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.io.FileReference;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
-import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
-import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
-import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
-import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
-
-public abstract class AbstractTreeIndex implements ITreeIndex {
-
- protected final static int rootPage = 1;
-
- protected final IBufferCache bufferCache;
- protected final IFileMapProvider fileMapProvider;
- protected final IFreePageManager freePageManager;
-
- protected final ITreeIndexFrameFactory interiorFrameFactory;
- protected final ITreeIndexFrameFactory leafFrameFactory;
-
- protected final IBinaryComparatorFactory[] cmpFactories;
- protected final int fieldCount;
-
- protected FileReference file;
- protected int fileId = -1;
-
- private boolean isActivated = false;
-
- public AbstractTreeIndex(IBufferCache bufferCache, IFileMapProvider fileMapProvider,
- IFreePageManager freePageManager, ITreeIndexFrameFactory interiorFrameFactory,
- ITreeIndexFrameFactory leafFrameFactory, IBinaryComparatorFactory[] cmpFactories, int fieldCount,
- FileReference file) {
- this.bufferCache = bufferCache;
- this.fileMapProvider = fileMapProvider;
- this.freePageManager = freePageManager;
- this.interiorFrameFactory = interiorFrameFactory;
- this.leafFrameFactory = leafFrameFactory;
- this.cmpFactories = cmpFactories;
- this.fieldCount = fieldCount;
- this.file = file;
- }
-
- public synchronized void create() throws HyracksDataException {
- if (isActivated) {
- throw new HyracksDataException("Failed to create the index since it is activated.");
- }
-
- boolean fileIsMapped = false;
- synchronized (fileMapProvider) {
- fileIsMapped = fileMapProvider.isMapped(file);
- if (!fileIsMapped) {
- bufferCache.createFile(file);
- }
- fileId = fileMapProvider.lookupFileId(file);
- try {
- // Also creates the file if it doesn't exist yet.
- bufferCache.openFile(fileId);
- } catch (HyracksDataException e) {
- // Revert state of buffer cache since file failed to open.
- if (!fileIsMapped) {
- bufferCache.deleteFile(fileId, false);
- }
- throw e;
- }
- }
-
- freePageManager.open(fileId);
- initEmptyTree();
- freePageManager.close();
- bufferCache.closeFile(fileId);
- }
-
- private void initEmptyTree() throws HyracksDataException {
- ITreeIndexFrame frame = leafFrameFactory.createFrame();
- ITreeIndexMetaDataFrame metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
- freePageManager.init(metaFrame, rootPage);
-
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
- rootNode.acquireWriteLatch();
- try {
- frame.setPage(rootNode);
- frame.initBuffer((byte) 0);
- } finally {
- rootNode.releaseWriteLatch();
- bufferCache.unpin(rootNode);
- }
- }
-
- public synchronized void activate() throws HyracksDataException {
- if (isActivated) {
- throw new HyracksDataException("Failed to activate the index since it is already activated.");
- }
-
- boolean fileIsMapped = false;
- synchronized (fileMapProvider) {
- fileIsMapped = fileMapProvider.isMapped(file);
- if (!fileIsMapped) {
- bufferCache.createFile(file);
- }
- fileId = fileMapProvider.lookupFileId(file);
- try {
- // Also creates the file if it doesn't exist yet.
- bufferCache.openFile(fileId);
- } catch (HyracksDataException e) {
- // Revert state of buffer cache since file failed to open.
- if (!fileIsMapped) {
- bufferCache.deleteFile(fileId, false);
- }
- throw e;
- }
- }
- freePageManager.open(fileId);
-
- // TODO: Should probably have some way to check that the tree is physically consistent
- // or that the file we just opened actually is a tree
-
- isActivated = true;
- }
-
- public synchronized void deactivate() throws HyracksDataException {
- if (!isActivated) {
- throw new HyracksDataException("Failed to deactivate the index since it is already deactivated.");
- }
-
- bufferCache.closeFile(fileId);
- freePageManager.close();
-
- isActivated = false;
- }
-
- public synchronized void destroy() throws HyracksDataException {
- if (isActivated) {
- throw new HyracksDataException("Failed to destroy the index since it is activated.");
- }
-
- file.delete();
- if (fileId == -1) {
- return;
- }
-
- bufferCache.deleteFile(fileId, false);
- fileId = -1;
- }
-
- public synchronized void clear() throws HyracksDataException {
- if (!isActivated) {
- throw new HyracksDataException("Failed to clear the index since it is not activated.");
- }
- initEmptyTree();
- }
-
- public boolean isEmptyTree(ITreeIndexFrame frame) throws HyracksDataException {
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
- rootNode.acquireReadLatch();
- try {
- frame.setPage(rootNode);
- if (frame.getLevel() == 0 && frame.getTupleCount() == 0) {
- return true;
- } else {
- return false;
- }
- } finally {
- rootNode.releaseReadLatch();
- bufferCache.unpin(rootNode);
- }
- }
-
- public byte getTreeHeight(ITreeIndexFrame frame) throws HyracksDataException {
- ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
- rootNode.acquireReadLatch();
- try {
- frame.setPage(rootNode);
- return frame.getLevel();
- } finally {
- rootNode.releaseReadLatch();
- bufferCache.unpin(rootNode);
- }
- }
-
- public int getFileId() {
- return fileId;
- }
-
- public FileReference getFileReference() {
- return file;
- }
-
- public IBufferCache getBufferCache() {
- return bufferCache;
- }
-
- public ITreeIndexFrameFactory getInteriorFrameFactory() {
- return interiorFrameFactory;
- }
-
- public ITreeIndexFrameFactory getLeafFrameFactory() {
- return leafFrameFactory;
- }
-
- public IBinaryComparatorFactory[] getComparatorFactories() {
- return cmpFactories;
- }
-
- public IFreePageManager getFreePageManager() {
- return freePageManager;
- }
-
- public int getRootPageId() {
- return rootPage;
- }
-
- public int getFieldCount() {
- return fieldCount;
- }
-
- public abstract class AbstractTreeIndexBulkLoader implements IIndexBulkLoader {
- protected final MultiComparator cmp;
- protected final int slotSize;
- protected final int leafMaxBytes;
- protected final int interiorMaxBytes;
- protected final ArrayList<NodeFrontier> nodeFrontiers = new ArrayList<NodeFrontier>();
- protected final ITreeIndexMetaDataFrame metaFrame;
- protected final ITreeIndexTupleWriter tupleWriter;
- protected ITreeIndexFrame leafFrame;
- protected ITreeIndexFrame interiorFrame;
-
- public AbstractTreeIndexBulkLoader(float fillFactor) throws TreeIndexException, HyracksDataException {
- leafFrame = leafFrameFactory.createFrame();
- interiorFrame = interiorFrameFactory.createFrame();
- metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
-
- if (!isEmptyTree(leafFrame)) {
- throw new TreeIndexException("Cannot bulk-load a non-empty tree.");
- }
-
- this.cmp = MultiComparator.createIgnoreFieldLength(cmpFactories);
-
- leafFrame.setMultiComparator(cmp);
- interiorFrame.setMultiComparator(cmp);
-
- tupleWriter = leafFrame.getTupleWriter();
-
- NodeFrontier leafFrontier = new NodeFrontier(leafFrame.createTupleReference());
- leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
- leafFrontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId), true);
- leafFrontier.page.acquireWriteLatch();
-
- interiorFrame.setPage(leafFrontier.page);
- interiorFrame.initBuffer((byte) 0);
- interiorMaxBytes = (int) ((float) interiorFrame.getBuffer().capacity() * fillFactor);
-
- leafFrame.setPage(leafFrontier.page);
- leafFrame.initBuffer((byte) 0);
- leafMaxBytes = (int) ((float) leafFrame.getBuffer().capacity() * fillFactor);
- slotSize = leafFrame.getSlotSize();
-
- nodeFrontiers.add(leafFrontier);
- }
-
- public abstract void add(ITupleReference tuple) throws IndexException, HyracksDataException;
-
- protected void handleException() throws HyracksDataException {
- // Unlatch and unpin pages.
- for (NodeFrontier nodeFrontier : nodeFrontiers) {
- nodeFrontier.page.releaseWriteLatch();
- bufferCache.unpin(nodeFrontier.page);
- }
- }
-
- @Override
- public void end() throws HyracksDataException {
- // copy the root generated from the bulk-load to *the* root page location
- ICachedPage newRoot = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
- newRoot.acquireWriteLatch();
- NodeFrontier lastNodeFrontier = nodeFrontiers.get(nodeFrontiers.size() - 1);
- try {
- System.arraycopy(lastNodeFrontier.page.getBuffer().array(), 0, newRoot.getBuffer().array(), 0,
- lastNodeFrontier.page.getBuffer().capacity());
- } finally {
- newRoot.releaseWriteLatch();
- bufferCache.unpin(newRoot);
-
- // register old root as a free page
- freePageManager.addFreePage(metaFrame, lastNodeFrontier.pageId);
-
- for (int i = 0; i < nodeFrontiers.size(); i++) {
- nodeFrontiers.get(i).page.releaseWriteLatch();
- bufferCache.unpin(nodeFrontiers.get(i).page);
- }
- }
- }
-
- protected void addLevel() throws HyracksDataException {
- NodeFrontier frontier = new NodeFrontier(tupleWriter.createTupleReference());
- frontier.pageId = freePageManager.getFreePage(metaFrame);
- frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, frontier.pageId), true);
- frontier.page.acquireWriteLatch();
- frontier.lastTuple.setFieldCount(cmp.getKeyFieldCount());
- interiorFrame.setPage(frontier.page);
- interiorFrame.initBuffer((byte) nodeFrontiers.size());
- nodeFrontiers.add(frontier);
- }
- }
-
- public class TreeIndexInsertBulkLoader implements IIndexBulkLoader {
- ITreeIndexAccessor accessor;
-
- public TreeIndexInsertBulkLoader() throws HyracksDataException {
- accessor = (ITreeIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- }
-
- @Override
- public void add(ITupleReference tuple) throws HyracksDataException {
- try {
- accessor.insert(tuple);
- } catch (IndexException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void end() throws HyracksDataException {
- // do nothing
- }
-
- }
-
- @Override
- public long getMemoryAllocationSize() {
- return 0;
- }
+
+package edu.uci.ics.hyracks.storage.am.common.impls;
+
+import java.util.ArrayList;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+import edu.uci.ics.hyracks.storage.common.file.BufferedFileHandle;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+
+public abstract class AbstractTreeIndex implements ITreeIndex {
+
+ protected final static int rootPage = 1;
+
+ protected final IBufferCache bufferCache;
+ protected final IFileMapProvider fileMapProvider;
+ protected final IFreePageManager freePageManager;
+
+ protected final ITreeIndexFrameFactory interiorFrameFactory;
+ protected final ITreeIndexFrameFactory leafFrameFactory;
+
+ protected final IBinaryComparatorFactory[] cmpFactories;
+ protected final int fieldCount;
+
+ protected FileReference file;
+ protected int fileId = -1;
+
+ private boolean isActivated = false;
+
+ public AbstractTreeIndex(IBufferCache bufferCache, IFileMapProvider fileMapProvider,
+ IFreePageManager freePageManager, ITreeIndexFrameFactory interiorFrameFactory,
+ ITreeIndexFrameFactory leafFrameFactory, IBinaryComparatorFactory[] cmpFactories, int fieldCount,
+ FileReference file) {
+ this.bufferCache = bufferCache;
+ this.fileMapProvider = fileMapProvider;
+ this.freePageManager = freePageManager;
+ this.interiorFrameFactory = interiorFrameFactory;
+ this.leafFrameFactory = leafFrameFactory;
+ this.cmpFactories = cmpFactories;
+ this.fieldCount = fieldCount;
+ this.file = file;
+ }
+
+ public synchronized void create() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to create the index since it is activated.");
+ }
+
+ boolean fileIsMapped = false;
+ synchronized (fileMapProvider) {
+ fileIsMapped = fileMapProvider.isMapped(file);
+ if (!fileIsMapped) {
+ bufferCache.createFile(file);
+ }
+ fileId = fileMapProvider.lookupFileId(file);
+ try {
+ // Also creates the file if it doesn't exist yet.
+ bufferCache.openFile(fileId);
+ } catch (HyracksDataException e) {
+ // Revert state of buffer cache since file failed to open.
+ if (!fileIsMapped) {
+ bufferCache.deleteFile(fileId, false);
+ }
+ throw e;
+ }
+ }
+
+ freePageManager.open(fileId);
+ initEmptyTree();
+ freePageManager.close();
+ bufferCache.closeFile(fileId);
+ }
+
+ private void initEmptyTree() throws HyracksDataException {
+ ITreeIndexFrame frame = leafFrameFactory.createFrame();
+ ITreeIndexMetaDataFrame metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
+ freePageManager.init(metaFrame, rootPage);
+
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
+ rootNode.acquireWriteLatch();
+ try {
+ frame.setPage(rootNode);
+ frame.initBuffer((byte) 0);
+ } finally {
+ rootNode.releaseWriteLatch();
+ bufferCache.unpin(rootNode);
+ }
+ }
+
+ public synchronized void activate() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to activate the index since it is already activated.");
+ }
+
+ boolean fileIsMapped = false;
+ synchronized (fileMapProvider) {
+ fileIsMapped = fileMapProvider.isMapped(file);
+ if (!fileIsMapped) {
+ bufferCache.createFile(file);
+ }
+ fileId = fileMapProvider.lookupFileId(file);
+ try {
+ // Also creates the file if it doesn't exist yet.
+ bufferCache.openFile(fileId);
+ } catch (HyracksDataException e) {
+ // Revert state of buffer cache since file failed to open.
+ if (!fileIsMapped) {
+ bufferCache.deleteFile(fileId, false);
+ }
+ throw e;
+ }
+ }
+ freePageManager.open(fileId);
+
+ // TODO: Should probably have some way to check that the tree is physically consistent
+ // or that the file we just opened actually is a tree
+
+ isActivated = true;
+ }
+
+ public synchronized void deactivate() throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("Failed to deactivate the index since it is already deactivated.");
+ }
+
+ bufferCache.closeFile(fileId);
+ freePageManager.close();
+
+ isActivated = false;
+ }
+
+ public synchronized void destroy() throws HyracksDataException {
+ if (isActivated) {
+ throw new HyracksDataException("Failed to destroy the index since it is activated.");
+ }
+
+ file.delete();
+ if (fileId == -1) {
+ return;
+ }
+
+ bufferCache.deleteFile(fileId, false);
+ fileId = -1;
+ }
+
+ public synchronized void clear() throws HyracksDataException {
+ if (!isActivated) {
+ throw new HyracksDataException("Failed to clear the index since it is not activated.");
+ }
+ initEmptyTree();
+ }
+
+ public boolean isEmptyTree(ITreeIndexFrame frame) throws HyracksDataException {
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
+ rootNode.acquireReadLatch();
+ try {
+ frame.setPage(rootNode);
+ if (frame.getLevel() == 0 && frame.getTupleCount() == 0) {
+ return true;
+ } else {
+ return false;
+ }
+ } finally {
+ rootNode.releaseReadLatch();
+ bufferCache.unpin(rootNode);
+ }
+ }
+
+ public byte getTreeHeight(ITreeIndexFrame frame) throws HyracksDataException {
+ ICachedPage rootNode = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), false);
+ rootNode.acquireReadLatch();
+ try {
+ frame.setPage(rootNode);
+ return frame.getLevel();
+ } finally {
+ rootNode.releaseReadLatch();
+ bufferCache.unpin(rootNode);
+ }
+ }
+
+ public int getFileId() {
+ return fileId;
+ }
+
+ public FileReference getFileReference() {
+ return file;
+ }
+
+ public IBufferCache getBufferCache() {
+ return bufferCache;
+ }
+
+ public ITreeIndexFrameFactory getInteriorFrameFactory() {
+ return interiorFrameFactory;
+ }
+
+ public ITreeIndexFrameFactory getLeafFrameFactory() {
+ return leafFrameFactory;
+ }
+
+ public IBinaryComparatorFactory[] getComparatorFactories() {
+ return cmpFactories;
+ }
+
+ public IFreePageManager getFreePageManager() {
+ return freePageManager;
+ }
+
+ public int getRootPageId() {
+ return rootPage;
+ }
+
+ public int getFieldCount() {
+ return fieldCount;
+ }
+
+ public abstract class AbstractTreeIndexBulkLoader implements IIndexBulkLoader {
+ protected final MultiComparator cmp;
+ protected final int slotSize;
+ protected final int leafMaxBytes;
+ protected final int interiorMaxBytes;
+ protected final ArrayList<NodeFrontier> nodeFrontiers = new ArrayList<NodeFrontier>();
+ protected final ITreeIndexMetaDataFrame metaFrame;
+ protected final ITreeIndexTupleWriter tupleWriter;
+ protected ITreeIndexFrame leafFrame;
+ protected ITreeIndexFrame interiorFrame;
+ private boolean releasedLatches;
+
+ public AbstractTreeIndexBulkLoader(float fillFactor) throws TreeIndexException, HyracksDataException {
+ leafFrame = leafFrameFactory.createFrame();
+ interiorFrame = interiorFrameFactory.createFrame();
+ metaFrame = freePageManager.getMetaDataFrameFactory().createFrame();
+
+ if (!isEmptyTree(leafFrame)) {
+ throw new TreeIndexException("Cannot bulk-load a non-empty tree.");
+ }
+
+ this.cmp = MultiComparator.createIgnoreFieldLength(cmpFactories);
+
+ leafFrame.setMultiComparator(cmp);
+ interiorFrame.setMultiComparator(cmp);
+
+ tupleWriter = leafFrame.getTupleWriter();
+
+ NodeFrontier leafFrontier = new NodeFrontier(leafFrame.createTupleReference());
+ leafFrontier.pageId = freePageManager.getFreePage(metaFrame);
+ leafFrontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, leafFrontier.pageId), true);
+ leafFrontier.page.acquireWriteLatch();
+
+ interiorFrame.setPage(leafFrontier.page);
+ interiorFrame.initBuffer((byte) 0);
+ interiorMaxBytes = (int) ((float) interiorFrame.getBuffer().capacity() * fillFactor);
+
+ leafFrame.setPage(leafFrontier.page);
+ leafFrame.initBuffer((byte) 0);
+ leafMaxBytes = (int) ((float) leafFrame.getBuffer().capacity() * fillFactor);
+ slotSize = leafFrame.getSlotSize();
+
+ nodeFrontiers.add(leafFrontier);
+ }
+
+ public abstract void add(ITupleReference tuple) throws IndexException, HyracksDataException;
+
+ protected void handleException() throws HyracksDataException {
+ // Unlatch and unpin pages.
+ for (NodeFrontier nodeFrontier : nodeFrontiers) {
+ nodeFrontier.page.releaseWriteLatch();
+ bufferCache.unpin(nodeFrontier.page);
+ }
+ releasedLatches = true;
+ }
+
+ @Override
+ public void end() throws HyracksDataException {
+ // copy the root generated from the bulk-load to *the* root page location
+ ICachedPage newRoot = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, rootPage), true);
+ newRoot.acquireWriteLatch();
+ NodeFrontier lastNodeFrontier = nodeFrontiers.get(nodeFrontiers.size() - 1);
+ try {
+ System.arraycopy(lastNodeFrontier.page.getBuffer().array(), 0, newRoot.getBuffer().array(), 0,
+ lastNodeFrontier.page.getBuffer().capacity());
+ } finally {
+ newRoot.releaseWriteLatch();
+ bufferCache.unpin(newRoot);
+
+ // register old root as a free page
+ freePageManager.addFreePage(metaFrame, lastNodeFrontier.pageId);
+
+ if (!releasedLatches) {
+ for (int i = 0; i < nodeFrontiers.size(); i++) {
+ try {
+ nodeFrontiers.get(i).page.releaseWriteLatch();
+ } catch (Exception e) {
+ //ignore illegal monitor state exception
+ }
+ bufferCache.unpin(nodeFrontiers.get(i).page);
+ }
+ }
+ }
+ }
+
+ protected void addLevel() throws HyracksDataException {
+ NodeFrontier frontier = new NodeFrontier(tupleWriter.createTupleReference());
+ frontier.pageId = freePageManager.getFreePage(metaFrame);
+ frontier.page = bufferCache.pin(BufferedFileHandle.getDiskPageId(fileId, frontier.pageId), true);
+ frontier.page.acquireWriteLatch();
+ frontier.lastTuple.setFieldCount(cmp.getKeyFieldCount());
+ interiorFrame.setPage(frontier.page);
+ interiorFrame.initBuffer((byte) nodeFrontiers.size());
+ nodeFrontiers.add(frontier);
+ }
+ }
+
+ public class TreeIndexInsertBulkLoader implements IIndexBulkLoader {
+ ITreeIndexAccessor accessor;
+
+ public TreeIndexInsertBulkLoader() throws HyracksDataException {
+ accessor = (ITreeIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ }
+
+ @Override
+ public void add(ITupleReference tuple) throws HyracksDataException {
+ try {
+ accessor.insert(tuple);
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void end() throws HyracksDataException {
+ // do nothing
+ }
+
+ }
+
+ @Override
+ public long getMemoryAllocationSize() {
+ return 0;
+ }
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/pom.xml b/hyracks/hyracks-storage-am-lsm-btree/pom.xml
index 158443d..d18210e 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-btree/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,21 +39,21 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-bloomfilter</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
index f217419..8a3c313 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelper.java
@@ -15,13 +15,15 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.lsm.btree.util.LSMBTreeUtils;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -31,28 +33,28 @@
public class LSMBTreeDataflowHelper extends AbstractLSMIndexDataflowHelper {
public LSMBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, ILSMMergePolicy mergePolicy,
+ List<IVirtualBufferCache> virtualBufferCaches, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- this(opDesc, ctx, partition, virtualBufferCache, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ this(opDesc, ctx, partition, virtualBufferCaches, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackFactory);
}
public LSMBTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy,
- ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(opDesc, ctx, partition, virtualBufferCache, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
+ ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
+ ioScheduler, ioOpCallbackFactory);
}
@Override
public ITreeIndex createIndexInstance() throws HyracksDataException {
AbstractTreeIndexOperatorDescriptor treeOpDesc = (AbstractTreeIndexOperatorDescriptor) opDesc;
- return LSMBTreeUtils.createLSMTree(virtualBufferCache, file, opDesc.getStorageManager().getBufferCache(ctx),
+ return LSMBTreeUtils.createLSMTree(virtualBufferCaches, file, opDesc.getStorageManager().getBufferCache(ctx),
opDesc.getStorageManager().getFileMapProvider(ctx), treeOpDesc.getTreeIndexTypeTraits(),
treeOpDesc.getTreeIndexComparatorFactories(), treeOpDesc.getTreeIndexBloomFilterKeyFields(),
bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory.getOperationTracker(ctx), ioScheduler,
- ioOpCallbackProvider);
+ ioOpCallbackFactory.createIOOperationCallback());
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
index d331880..104a70d 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/dataflow/LSMBTreeDataflowHelperFactory.java
@@ -18,7 +18,7 @@
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -31,18 +31,18 @@
public LSMBTreeDataflowHelperFactory(IVirtualBufferCacheProvider virtualBufferCacheProvider,
ILSMMergePolicyProvider mergePolicyProvider, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
double bloomFilterFalsePositiveRate) {
super(virtualBufferCacheProvider, mergePolicyProvider, opTrackerFactory, ioSchedulerProvider,
- ioOpCallbackProvider, bloomFilterFalsePositiveRate);
+ ioOpCallbackFactory, bloomFilterFalsePositiveRate);
}
@Override
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
return new LSMBTreeDataflowHelper(opDesc, ctx, partition,
- virtualBufferCacheProvider.getVirtualBufferCache(ctx), bloomFilterFalsePositiveRate,
+ virtualBufferCacheProvider.getVirtualBufferCaches(ctx), bloomFilterFalsePositiveRate,
mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
- ioOpCallbackProvider);
+ ioOpCallbackFactory);
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
index 3557d73..33ae7bb 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTree.java
@@ -27,7 +27,6 @@
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeBulkLoader;
@@ -46,6 +45,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@@ -67,6 +67,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndex;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BlockingIOOperationCallbackWrapper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
@@ -74,38 +75,42 @@
public class LSMBTree extends AbstractLSMIndex implements ITreeIndex {
- // In-memory components.
- private final LSMBTreeMutableComponent mutableComponent;
-
// For creating BTree's used in flush and merge.
- private final LSMBTreeImmutableComponentFactory componentFactory;
+ private final LSMBTreeDiskComponentFactory componentFactory;
// For creating BTree's used in bulk load. Different from diskBTreeFactory
// because it should have a different tuple writer in it's leaf frames.
- private final LSMBTreeImmutableComponentFactory bulkLoadComponentFactory;
+ private final LSMBTreeDiskComponentFactory bulkLoadComponentFactory;
// Common for in-memory and on-disk components.
private final ITreeIndexFrameFactory insertLeafFrameFactory;
private final ITreeIndexFrameFactory deleteLeafFrameFactory;
private final IBinaryComparatorFactory[] cmpFactories;
- public LSMBTree(IVirtualBufferCache virtualBufferCache, ITreeIndexFrameFactory interiorFrameFactory,
+ public LSMBTree(List<IVirtualBufferCache> virtualBufferCaches, ITreeIndexFrameFactory interiorFrameFactory,
ITreeIndexFrameFactory insertLeafFrameFactory, ITreeIndexFrameFactory deleteLeafFrameFactory,
ILSMIndexFileManager fileManager, TreeIndexFactory<BTree> diskBTreeFactory,
TreeIndexFactory<BTree> bulkLoadBTreeFactory, BloomFilterFactory bloomFilterFactory,
double bloomFilterFalsePositiveRate, IFileMapProvider diskFileMapProvider, int fieldCount,
IBinaryComparatorFactory[] cmpFactories, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(virtualBufferCache, diskBTreeFactory.getBufferCache(), fileManager, diskFileMapProvider,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
- mutableComponent = new LSMBTreeMutableComponent(new BTree(virtualBufferCache,
- virtualBufferCache.getFileMapProvider(), new VirtualFreePageManager(virtualBufferCache.getNumPages()),
- interiorFrameFactory, insertLeafFrameFactory, cmpFactories, fieldCount, new FileReference(new File(
- fileManager.getBaseDir() + "_virtual"))), virtualBufferCache);
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback) {
+ super(virtualBufferCaches, diskBTreeFactory.getBufferCache(), fileManager, diskFileMapProvider,
+ bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback);
+ int i = 0;
+ for (IVirtualBufferCache virtualBufferCache : virtualBufferCaches) {
+ LSMBTreeMemoryComponent mutableComponent = new LSMBTreeMemoryComponent(new BTree(virtualBufferCache,
+ virtualBufferCache.getFileMapProvider(), new VirtualFreePageManager(
+ virtualBufferCache.getNumPages()), interiorFrameFactory, insertLeafFrameFactory,
+ cmpFactories, fieldCount, new FileReference(new File(fileManager.getBaseDir() + "_virtual_" + i))),
+ virtualBufferCache, i == 0 ? true : false);
+ memoryComponents.add(mutableComponent);
+ ++i;
+ }
+
this.insertLeafFrameFactory = insertLeafFrameFactory;
this.deleteLeafFrameFactory = deleteLeafFrameFactory;
this.cmpFactories = cmpFactories;
- componentFactory = new LSMBTreeImmutableComponentFactory(diskBTreeFactory, bloomFilterFactory);
- bulkLoadComponentFactory = new LSMBTreeImmutableComponentFactory(bulkLoadBTreeFactory, bloomFilterFactory);
+ componentFactory = new LSMBTreeDiskComponentFactory(diskBTreeFactory, bloomFilterFactory);
+ bulkLoadComponentFactory = new LSMBTreeDiskComponentFactory(bulkLoadBTreeFactory, bloomFilterFactory);
}
@Override
@@ -116,7 +121,7 @@
fileManager.deleteDirs();
fileManager.createDirs();
- componentsRef.get().clear();
+ diskComponents.clear();
}
@Override
@@ -124,11 +129,13 @@
if (isActivated) {
throw new HyracksDataException("Failed to activate the index since it is already activated.");
}
-
- ((IVirtualBufferCache) mutableComponent.getBTree().getBufferCache()).open();
- mutableComponent.getBTree().create();
- mutableComponent.getBTree().activate();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ ((IVirtualBufferCache) mutableComponent.getBTree().getBufferCache()).open();
+ mutableComponent.getBTree().create();
+ mutableComponent.getBTree().activate();
+ }
+ List<ILSMComponent> immutableComponents = diskComponents;
immutableComponents.clear();
List<LSMComponentFileReferences> validFileReferences;
try {
@@ -137,7 +144,7 @@
throw new HyracksDataException(e);
}
for (LSMComponentFileReferences lsmComonentFileReference : validFileReferences) {
- LSMBTreeImmutableComponent component;
+ LSMBTreeDiskComponent component;
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
@@ -158,7 +165,7 @@
if (flushOnExit) {
BlockingIOOperationCallbackWrapper cb = new BlockingIOOperationCallbackWrapper(
- ioOpCallbackProvider.getIOOperationCallback(this));
+ ioOpCallback);
ILSMIndexAccessor accessor = createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
accessor.scheduleFlush(cb);
try {
@@ -168,17 +175,20 @@
}
}
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c;
BTree btree = component.getBTree();
BloomFilter bloomFilter = component.getBloomFilter();
btree.deactivate();
bloomFilter.deactivate();
}
- mutableComponent.getBTree().deactivate();
- mutableComponent.getBTree().destroy();
- ((IVirtualBufferCache) mutableComponent.getBTree().getBufferCache()).close();
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ mutableComponent.getBTree().deactivate();
+ mutableComponent.getBTree().destroy();
+ ((IVirtualBufferCache) mutableComponent.getBTree().getBufferCache()).close();
+ }
isActivated = false;
}
@@ -193,13 +203,16 @@
throw new HyracksDataException("Failed to destroy the index since it is activated.");
}
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c;
component.getBTree().destroy();
component.getBloomFilter().destroy();
}
- mutableComponent.getBTree().destroy();
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ mutableComponent.getBTree().destroy();
+ }
fileManager.deleteDirs();
}
@@ -209,11 +222,14 @@
throw new HyracksDataException("Failed to clear the index since it is not activated.");
}
- List<ILSMComponent> immutableComponents = componentsRef.get();
- mutableComponent.getBTree().clear();
- mutableComponent.reset();
+ List<ILSMComponent> immutableComponents = diskComponents;
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ mutableComponent.getBTree().clear();
+ mutableComponent.reset();
+ }
for (ILSMComponent c : immutableComponents) {
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) c;
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) c;
component.getBloomFilter().deactivate();
component.getBTree().deactivate();
component.getBloomFilter().destroy();
@@ -224,20 +240,32 @@
@Override
public void getOperationalComponents(ILSMIndexOperationContext ctx) {
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
List<ILSMComponent> operationalComponents = ctx.getComponentHolder();
operationalComponents.clear();
+ int cmc = currentMutableComponentId.get();
+ ctx.setCurrentMutableComponentId(cmc);
+ int numMutableComponents = memoryComponents.size();
switch (ctx.getOperation()) {
case UPDATE:
case UPSERT:
case PHYSICALDELETE:
case FLUSH:
case DELETE:
- operationalComponents.add(mutableComponent);
+ operationalComponents.add(memoryComponents.get(cmc));
break;
case SEARCH:
case INSERT:
- operationalComponents.add(mutableComponent);
+ for (int i = 0; i < numMutableComponents - 1; i++) {
+ ILSMComponent c = memoryComponents.get((cmc + i + 1) % numMutableComponents);
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ if (mutableComponent.isReadable()) {
+ // Make sure newest components are added first
+ operationalComponents.add(0, mutableComponent);
+ }
+ }
+ // The current mutable component is always added
+ operationalComponents.add(0, memoryComponents.get(cmc));
operationalComponents.addAll(immutableComponents);
break;
case MERGE:
@@ -253,36 +281,38 @@
LSMBTreeOpContext ctx = (LSMBTreeOpContext) ictx;
switch (ctx.getOperation()) {
case PHYSICALDELETE:
- ctx.memBTreeAccessor.delete(tuple);
+ ctx.currentMutableBTreeAccessor.delete(tuple);
break;
case INSERT:
insert(tuple, ctx);
break;
default:
- ctx.memBTreeAccessor.upsert(tuple);
+ ctx.currentMutableBTreeAccessor.upsert(tuple);
break;
}
- mutableComponent.setIsModified();
}
private boolean insert(ITupleReference tuple, LSMBTreeOpContext ctx) throws HyracksDataException, IndexException {
+ ILSMComponent c = ctx.getComponentHolder().get(0);
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
MultiComparator comparator = MultiComparator.createIgnoreFieldLength(mutableComponent.getBTree()
.getComparatorFactories());
LSMBTreePointSearchCursor searchCursor = new LSMBTreePointSearchCursor(ctx);
- IIndexCursor memCursor = new BTreeRangeSearchCursor(ctx.memBTreeOpCtx.leafFrame, false);
+ IIndexCursor memCursor = new BTreeRangeSearchCursor(ctx.currentMutableBTreeOpCtx.leafFrame, false);
RangePredicate predicate = new RangePredicate(tuple, tuple, true, true, comparator, comparator);
// first check the inmemory component
- ctx.memBTreeAccessor.search(memCursor, predicate);
+ ctx.currentMutableBTreeAccessor.search(memCursor, predicate);
try {
if (memCursor.hasNext()) {
memCursor.next();
LSMBTreeTupleReference lsmbtreeTuple = (LSMBTreeTupleReference) memCursor.getTuple();
if (!lsmbtreeTuple.isAntimatter()) {
- throw new BTreeDuplicateKeyException("Failed to insert key since key already exists.");
+ throw new TreeIndexDuplicateKeyException("Failed to insert key since key already exists.");
} else {
memCursor.close();
- ctx.memBTreeAccessor.upsertIfConditionElseInsert(tuple, AntimatterAwareTupleAcceptor.INSTANCE);
+ ctx.currentMutableBTreeAccessor.upsertIfConditionElseInsert(tuple,
+ AntimatterAwareTupleAcceptor.INSTANCE);
return true;
}
}
@@ -296,16 +326,21 @@
// the mutable component?
// the key was not in the inmemory component, so check the disk
// components
+
+ // This is a hack to avoid searching the current active mutable component twice. It is critical to add it back once the search is over.
+ ILSMComponent firstComponent = ctx.getComponentHolder().remove(0);
search(ctx, searchCursor, predicate);
try {
if (searchCursor.hasNext()) {
- throw new BTreeDuplicateKeyException("Failed to insert key since key already exists.");
+ throw new TreeIndexDuplicateKeyException("Failed to insert key since key already exists.");
}
} finally {
searchCursor.close();
+ // Add the current active mutable component back
+ ctx.getComponentHolder().add(0, firstComponent);
}
- ctx.memBTreeAccessor.upsertIfConditionElseInsert(tuple, AntimatterAwareTupleAcceptor.INSTANCE);
+ ctx.currentMutableBTreeAccessor.upsertIfConditionElseInsert(tuple, AntimatterAwareTupleAcceptor.INSTANCE);
return true;
}
@@ -314,38 +349,30 @@
throws HyracksDataException, IndexException {
LSMBTreeOpContext ctx = (LSMBTreeOpContext) ictx;
List<ILSMComponent> operationalComponents = ctx.getComponentHolder();
- int numBTrees = operationalComponents.size();
- assert numBTrees > 0;
- boolean includeMutableComponent = operationalComponents.get(0) == mutableComponent;
- LSMBTreeCursorInitialState initialState = new LSMBTreeCursorInitialState(numBTrees, insertLeafFrameFactory,
- ctx.cmp, ctx.bloomFilterCmp, includeMutableComponent, lsmHarness, ctx.memBTreeAccessor, pred,
- ctx.searchCallback, operationalComponents);
+ LSMBTreeCursorInitialState initialState = new LSMBTreeCursorInitialState(insertLeafFrameFactory, ctx.cmp,
+ ctx.bloomFilterCmp, lsmHarness, pred, ctx.searchCallback, operationalComponents);
cursor.open(initialState, pred);
}
@Override
- public boolean scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
+ public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException {
- if (!mutableComponent.isModified()) {
- return false;
- }
+ ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
LSMBTreeOpContext opCtx = createOpContext(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
assert ctx.getComponentHolder().size() == 1;
- ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
opCtx.setOperation(IndexOperation.FLUSH);
opCtx.getComponentHolder().add(flushingComponent);
ILSMIndexAccessorInternal flushAccessor = new LSMBTreeAccessor(lsmHarness, opCtx);
ioScheduler.scheduleOperation(new LSMBTreeFlushOperation(flushAccessor, flushingComponent, componentFileRefs
.getInsertIndexFileReference(), componentFileRefs.getBloomFilterFileReference(), callback));
- return true;
}
@Override
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMBTreeFlushOperation flushOp = (LSMBTreeFlushOperation) operation;
- LSMBTreeMutableComponent flushingComponent = (LSMBTreeMutableComponent) flushOp.getFlushingComponent();
+ LSMBTreeMemoryComponent flushingComponent = (LSMBTreeMemoryComponent) flushOp.getFlushingComponent();
IIndexAccessor accessor = flushingComponent.getBTree().createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
@@ -368,7 +395,7 @@
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
bloomFilterFalsePositiveRate);
- LSMBTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getBTreeFlushTarget(),
+ LSMBTreeDiskComponent component = createDiskComponent(componentFactory, flushOp.getBTreeFlushTarget(),
flushOp.getBloomFilterFlushTarget(), true);
IIndexBulkLoader bulkLoader = component.getBTree().createBulkLoader(1.0f, false, numElements, false);
IIndexBulkLoader builder = component.getBloomFilter().createBuilder(numElements,
@@ -390,18 +417,16 @@
return component;
}
+ @Override
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException {
LSMBTreeOpContext opCtx = createOpContext(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
- opCtx.getComponentHolder().addAll(mergingComponents);
- ITreeIndexCursor cursor = new LSMBTreeRangeSearchCursor(opCtx);
- RangePredicate rangePred = new RangePredicate(null, null, true, true, null, null);
- search(opCtx, cursor, rangePred);
-
opCtx.setOperation(IndexOperation.MERGE);
- BTree firstBTree = (BTree) ((LSMBTreeImmutableComponent) mergingComponents.get(0)).getBTree();
- BTree lastBTree = (BTree) ((LSMBTreeImmutableComponent) mergingComponents.get(mergingComponents.size() - 1))
+ List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
+ ITreeIndexCursor cursor = new LSMBTreeRangeSearchCursor(opCtx);
+
+ BTree firstBTree = (BTree) ((LSMBTreeDiskComponent) mergingComponents.get(0)).getBTree();
+ BTree lastBTree = (BTree) ((LSMBTreeDiskComponent) mergingComponents.get(mergingComponents.size() - 1))
.getBTree();
FileReference firstFile = diskFileMapProvider.lookupFileName(firstBTree.getFileId());
FileReference lastFile = diskFileMapProvider.lookupFileName(lastBTree.getFileId());
@@ -413,21 +438,24 @@
}
@Override
- public ILSMComponent merge(List<ILSMComponent> mergedComponents, ILSMIOOperation operation)
- throws HyracksDataException, IndexException {
+ public ILSMComponent merge(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMBTreeMergeOperation mergeOp = (LSMBTreeMergeOperation) operation;
ITreeIndexCursor cursor = mergeOp.getCursor();
- mergedComponents.addAll(mergeOp.getMergingComponents());
+ RangePredicate rangePred = new RangePredicate(null, null, true, true, null, null);
+ ILSMIndexOperationContext opCtx = ((LSMIndexSearchCursor) cursor).getOpCtx();
+ opCtx.getComponentHolder().addAll(mergeOp.getMergingComponents());
+ search(opCtx, cursor, rangePred);
+ List<ILSMComponent> mergedComponents = mergeOp.getMergingComponents();
long numElements = 0L;
for (int i = 0; i < mergedComponents.size(); ++i) {
- numElements += ((LSMBTreeImmutableComponent) mergedComponents.get(i)).getBloomFilter().getNumElements();
+ numElements += ((LSMBTreeDiskComponent) mergedComponents.get(i)).getBloomFilter().getNumElements();
}
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
bloomFilterFalsePositiveRate);
- LSMBTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
+ LSMBTreeDiskComponent mergedComponent = createDiskComponent(componentFactory,
mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
IIndexBulkLoader bulkLoader = mergedComponent.getBTree().createBulkLoader(1.0f, false, numElements, false);
@@ -448,11 +476,11 @@
return mergedComponent;
}
- private LSMBTreeImmutableComponent createDiskComponent(LSMBTreeImmutableComponentFactory factory,
+ private LSMBTreeDiskComponent createDiskComponent(LSMBTreeDiskComponentFactory factory,
FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean createComponent)
throws HyracksDataException, IndexException {
// Create new BTree instance.
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) factory
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) factory
.createLSMComponentInstance(new LSMComponentFileReferences(btreeFileRef, null, bloomFilterFileRef));
if (createComponent) {
component.getBTree().create();
@@ -484,7 +512,7 @@
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
// The order of forcing the dirty page to be flushed is critical. The
// bloom filter must be always done first.
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) lsmComponent;
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) lsmComponent;
// Flush the bloom filter first.
int fileId = component.getBloomFilter().getFileId();
IBufferCache bufferCache = component.getBTree().getBufferCache();
@@ -513,13 +541,13 @@
} catch (HyracksDataException | IndexException e) {
throw new TreeIndexException(e);
}
- bulkLoader = (BTreeBulkLoader) ((LSMBTreeImmutableComponent) component).getBTree().createBulkLoader(
+ bulkLoader = (BTreeBulkLoader) ((LSMBTreeDiskComponent) component).getBTree().createBulkLoader(
fillFactor, verifyInput, numElementsHint, false);
int maxBucketsPerElement = BloomCalculations.maxBucketsPerElement(numElementsHint);
BloomFilterSpecification bloomFilterSpec = BloomCalculations.computeBloomSpec(maxBucketsPerElement,
bloomFilterFalsePositiveRate);
- builder = ((LSMBTreeImmutableComponent) component).getBloomFilter().createBuilder(numElementsHint,
+ builder = ((LSMBTreeDiskComponent) component).getBloomFilter().createBuilder(numElementsHint,
bloomFilterSpec.getNumHashes(), bloomFilterSpec.getNumBucketsPerElements());
}
@@ -545,10 +573,10 @@
builder.end();
endedBloomFilterLoad = true;
}
- ((LSMBTreeImmutableComponent) component).getBTree().deactivate();
- ((LSMBTreeImmutableComponent) component).getBTree().destroy();
- ((LSMBTreeImmutableComponent) component).getBloomFilter().deactivate();
- ((LSMBTreeImmutableComponent) component).getBloomFilter().destroy();
+ ((LSMBTreeDiskComponent) component).getBTree().deactivate();
+ ((LSMBTreeDiskComponent) component).getBTree().destroy();
+ ((LSMBTreeDiskComponent) component).getBloomFilter().deactivate();
+ ((LSMBTreeDiskComponent) component).getBloomFilter().destroy();
}
}
@@ -571,7 +599,7 @@
public LSMBTreeOpContext createOpContext(IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback) {
- return new LSMBTreeOpContext(mutableComponent.getBTree(), insertLeafFrameFactory, deleteLeafFrameFactory,
+ return new LSMBTreeOpContext(memoryComponents, insertLeafFrameFactory, deleteLeafFrameFactory,
modificationCallback, searchCallback, componentFactory.getBloomFilterKeyFields().length);
}
@@ -608,50 +636,66 @@
@Override
public ITreeIndexFrameFactory getInteriorFrameFactory() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getInteriorFrameFactory();
}
@Override
public int getFieldCount() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getFieldCount();
}
@Override
public int getFileId() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getFileId();
}
@Override
public IFreePageManager getFreePageManager() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getFreePageManager();
}
@Override
public ITreeIndexFrameFactory getLeafFrameFactory() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getLeafFrameFactory();
}
@Override
public long getMemoryAllocationSize() {
- IBufferCache virtualBufferCache = mutableComponent.getBTree().getBufferCache();
- return virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ long size = 0;
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ IBufferCache virtualBufferCache = mutableComponent.getBTree().getBufferCache();
+ size += virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ }
+ return size;
}
@Override
public int getRootPageId() {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getBTree().getRootPageId();
}
- public boolean isEmptyIndex() throws HyracksDataException {
- return componentsRef.get().isEmpty() && !mutableComponent.isModified();
- }
-
@Override
public void validate() throws HyracksDataException {
- mutableComponent.getBTree().validate();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ for (ILSMComponent c : memoryComponents) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) c;
+ mutableComponent.getBTree().validate();
+ }
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- BTree btree = (BTree) ((LSMBTreeImmutableComponent) c).getBTree();
+ BTree btree = (BTree) ((LSMBTreeDiskComponent) c).getBTree();
btree.validate();
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
index b5c6a45..37f89ff 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeCursorInitialState.java
@@ -18,7 +18,6 @@
import java.util.List;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
@@ -29,39 +28,28 @@
public class LSMBTreeCursorInitialState implements ICursorInitialState {
- private final int numBTrees;
private final ITreeIndexFrameFactory leafFrameFactory;
private MultiComparator cmp;
private final MultiComparator bloomFilterCmp;
- private final boolean includeMemComponent;
private final ILSMHarness lsmHarness;
- private final IIndexAccessor memBtreeAccessor;
private final ISearchPredicate predicate;
private ISearchOperationCallback searchCallback;
private final List<ILSMComponent> operationalComponents;
- public LSMBTreeCursorInitialState(int numBTrees, ITreeIndexFrameFactory leafFrameFactory, MultiComparator cmp,
- MultiComparator bloomFilterCmp, boolean includeMemComponent, ILSMHarness lsmHarness,
- IIndexAccessor memBtreeAccessor, ISearchPredicate predicate, ISearchOperationCallback searchCallback,
- List<ILSMComponent> operationalComponents) {
- this.numBTrees = numBTrees;
+ public LSMBTreeCursorInitialState(ITreeIndexFrameFactory leafFrameFactory, MultiComparator cmp,
+ MultiComparator bloomFilterCmp, ILSMHarness lsmHarness, ISearchPredicate predicate,
+ ISearchOperationCallback searchCallback, List<ILSMComponent> operationalComponents) {
this.leafFrameFactory = leafFrameFactory;
this.cmp = cmp;
this.bloomFilterCmp = bloomFilterCmp;
- this.includeMemComponent = includeMemComponent;
this.lsmHarness = lsmHarness;
this.searchCallback = searchCallback;
- this.memBtreeAccessor = memBtreeAccessor;
this.predicate = predicate;
this.operationalComponents = operationalComponents;
}
- public int getNumBTrees() {
- return numBTrees;
- }
-
public ITreeIndexFrameFactory getLeafFrameFactory() {
return leafFrameFactory;
}
@@ -75,10 +63,6 @@
public void setPage(ICachedPage page) {
}
- public boolean getIncludeMemComponent() {
- return includeMemComponent;
- }
-
public ILSMHarness getLSMHarness() {
return lsmHarness;
}
@@ -97,10 +81,6 @@
return operationalComponents;
}
- public IIndexAccessor getMemBTreeAccessor() {
- return memBtreeAccessor;
- }
-
public ISearchPredicate getSearchPredicate() {
return predicate;
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java
similarity index 84%
rename from hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java
index affdef8..0b2d7cf 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponent.java
@@ -17,13 +17,13 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractDiskLSMComponent;
-public class LSMBTreeImmutableComponent extends AbstractImmutableLSMComponent {
+public class LSMBTreeDiskComponent extends AbstractDiskLSMComponent {
private final BTree btree;
private final BloomFilter bloomFilter;
- public LSMBTreeImmutableComponent(BTree btree, BloomFilter bloomFilter) {
+ public LSMBTreeDiskComponent(BTree btree, BloomFilter bloomFilter) {
this.btree = btree;
this.bloomFilter = bloomFilter;
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java
similarity index 86%
rename from hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
rename to hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java
index 0efdc65..7b1e0de 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeImmutableComponentFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeDiskComponentFactory.java
@@ -25,11 +25,11 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-public class LSMBTreeImmutableComponentFactory implements ILSMComponentFactory {
+public class LSMBTreeDiskComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<BTree> btreeFactory;
private final BloomFilterFactory bloomFilterFactory;
- public LSMBTreeImmutableComponentFactory(TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
+ public LSMBTreeDiskComponentFactory(TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
this.btreeFactory = btreeFactory;
this.bloomFilterFactory = bloomFilterFactory;
}
@@ -37,7 +37,7 @@
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
HyracksDataException {
- return new LSMBTreeImmutableComponent(btreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ return new LSMBTreeDiskComponent(btreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java
index 1aab213..668e727 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeFlushOperation.java
@@ -58,8 +58,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.flush(this);
+ return true;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMutableComponent.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMemoryComponent.java
similarity index 79%
rename from hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMemoryComponent.java
index 0f3e3ce..7550349 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMemoryComponent.java
@@ -18,16 +18,15 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMemoryLSMComponent;
-public class LSMBTreeMutableComponent extends AbstractMutableLSMComponent {
+public class LSMBTreeMemoryComponent extends AbstractMemoryLSMComponent {
private final BTree btree;
- private final IVirtualBufferCache vbc;
- public LSMBTreeMutableComponent(BTree btree, IVirtualBufferCache vbc) {
+ public LSMBTreeMemoryComponent(BTree btree, IVirtualBufferCache vbc, boolean isActive) {
+ super(vbc, isActive);
this.btree = btree;
- this.vbc = vbc;
}
public BTree getBTree() {
@@ -35,11 +34,6 @@
}
@Override
- protected boolean isFull() {
- return vbc.isFull();
- }
-
- @Override
protected void reset() throws HyracksDataException {
super.reset();
btree.deactivate();
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
index fd3f1fd..3a608fe 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeMergeOperation.java
@@ -53,7 +53,7 @@
public Set<IODeviceHandle> getReadDevices() {
Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
for (ILSMComponent o : mergingComponents) {
- LSMBTreeImmutableComponent component = (LSMBTreeImmutableComponent) o;
+ LSMBTreeDiskComponent component = (LSMBTreeDiskComponent) o;
devs.add(component.getBTree().getFileReference().getDeviceHandle());
devs.add(component.getBloomFilter().getFileReference().getDeviceHandle());
}
@@ -69,8 +69,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.merge(this);
+ return true;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
index ac62d9f..6d2d7c0 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeOpContext.java
@@ -37,9 +37,11 @@
public ITreeIndexFrameFactory deleteLeafFrameFactory;
public IBTreeLeafFrame insertLeafFrame;
public IBTreeLeafFrame deleteLeafFrame;
- public final BTree memBTree;
- public BTree.BTreeAccessor memBTreeAccessor;
- public BTreeOpContext memBTreeOpCtx;
+ public final BTree[] mutableBTrees;
+ public BTree.BTreeAccessor[] mutableBTreeAccessors;
+ public BTreeOpContext[] mutableBTreeOpCtxs;
+ public BTree.BTreeAccessor currentMutableBTreeAccessor;
+ public BTreeOpContext currentMutableBTreeOpCtx;
public IndexOperation op;
public final MultiComparator cmp;
public final MultiComparator bloomFilterCmp;
@@ -47,19 +49,30 @@
public final ISearchOperationCallback searchCallback;
private final List<ILSMComponent> componentHolder;
- public LSMBTreeOpContext(BTree memBTree, ITreeIndexFrameFactory insertLeafFrameFactory,
+ public LSMBTreeOpContext(List<ILSMComponent> mutableComponents, ITreeIndexFrameFactory insertLeafFrameFactory,
ITreeIndexFrameFactory deleteLeafFrameFactory, IModificationOperationCallback modificationCallback,
ISearchOperationCallback searchCallback, int numBloomFilterKeyFields) {
- IBinaryComparatorFactory cmpFactories[] = memBTree.getComparatorFactories();
+ LSMBTreeMemoryComponent c = (LSMBTreeMemoryComponent) mutableComponents.get(0);
+ IBinaryComparatorFactory cmpFactories[] = c.getBTree().getComparatorFactories();
if (cmpFactories[0] != null) {
- this.cmp = MultiComparator.create(memBTree.getComparatorFactories());
+ this.cmp = MultiComparator.create(c.getBTree().getComparatorFactories());
} else {
this.cmp = null;
}
- bloomFilterCmp = MultiComparator.create(memBTree.getComparatorFactories(), 0, numBloomFilterKeyFields);
+ bloomFilterCmp = MultiComparator.create(c.getBTree().getComparatorFactories(), 0, numBloomFilterKeyFields);
- this.memBTree = memBTree;
+ mutableBTrees = new BTree[mutableComponents.size()];
+ mutableBTreeAccessors = new BTree.BTreeAccessor[mutableComponents.size()];
+ mutableBTreeOpCtxs = new BTreeOpContext[mutableComponents.size()];
+ for (int i = 0; i < mutableComponents.size(); i++) {
+ LSMBTreeMemoryComponent mutableComponent = (LSMBTreeMemoryComponent) mutableComponents.get(i);
+ mutableBTrees[i] = mutableComponent.getBTree();
+ mutableBTreeAccessors[i] = (BTree.BTreeAccessor) mutableBTrees[i].createAccessor(modificationCallback,
+ NoOpOperationCallback.INSTANCE);
+ mutableBTreeOpCtxs[i] = mutableBTreeAccessors[i].getOpContext();
+ }
+
this.insertLeafFrameFactory = insertLeafFrameFactory;
this.deleteLeafFrameFactory = deleteLeafFrameFactory;
this.insertLeafFrame = (IBTreeLeafFrame) insertLeafFrameFactory.createFrame();
@@ -79,48 +92,16 @@
public void setOperation(IndexOperation newOp) {
reset();
this.op = newOp;
- switch (newOp) {
- case SEARCH:
- setMemBTreeAccessor();
- break;
- case DISKORDERSCAN:
- case UPDATE:
- // Attention: It is important to leave the leafFrame and
- // leafFrameFactory of the memBTree as is when doing an update.
- // Update will only be set if a previous attempt to delete or
- // insert failed, so we must preserve the semantics of the
- // previously requested operation.
- setMemBTreeAccessor();
- return;
- case UPSERT:
- case INSERT:
- setInsertMode();
- break;
- case PHYSICALDELETE:
- case DELETE:
- setDeleteMode();
- break;
- }
- }
-
- private void setMemBTreeAccessor() {
- if (memBTreeAccessor == null) {
- memBTreeAccessor = (BTree.BTreeAccessor) memBTree.createAccessor(modificationCallback,
- NoOpOperationCallback.INSTANCE);
- memBTreeOpCtx = memBTreeAccessor.getOpContext();
- }
}
public void setInsertMode() {
- setMemBTreeAccessor();
- memBTreeOpCtx.leafFrame = insertLeafFrame;
- memBTreeOpCtx.leafFrameFactory = insertLeafFrameFactory;
+ currentMutableBTreeOpCtx.leafFrame = insertLeafFrame;
+ currentMutableBTreeOpCtx.leafFrameFactory = insertLeafFrameFactory;
}
public void setDeleteMode() {
- setMemBTreeAccessor();
- memBTreeOpCtx.leafFrame = deleteLeafFrame;
- memBTreeOpCtx.leafFrameFactory = deleteLeafFrameFactory;
+ currentMutableBTreeOpCtx.leafFrame = deleteLeafFrame;
+ currentMutableBTreeOpCtx.leafFrameFactory = deleteLeafFrameFactory;
}
@Override
@@ -146,4 +127,30 @@
public IModificationOperationCallback getModificationCallback() {
return modificationCallback;
}
+
+ @Override
+ public void setCurrentMutableComponentId(int currentMutableComponentId) {
+ currentMutableBTreeAccessor = mutableBTreeAccessors[currentMutableComponentId];
+ currentMutableBTreeOpCtx = mutableBTreeOpCtxs[currentMutableComponentId];
+ switch (op) {
+ case SEARCH:
+ break;
+ case DISKORDERSCAN:
+ case UPDATE:
+ // Attention: It is important to leave the leafFrame and
+ // leafFrameFactory of the mutableBTree as is when doing an update.
+ // Update will only be set if a previous attempt to delete or
+ // insert failed, so we must preserve the semantics of the
+ // previously requested operation.
+ break;
+ case UPSERT:
+ case INSERT:
+ setInsertMode();
+ break;
+ case PHYSICALDELETE:
+ case DELETE:
+ setDeleteMode();
+ break;
+ }
+ }
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
index 64e0c28..fc09a74 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreePointSearchCursor.java
@@ -15,7 +15,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
-import java.util.ListIterator;
+import java.util.List;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
@@ -32,6 +32,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMTreeTupleReference;
@@ -45,14 +46,14 @@
private final ILSMIndexOperationContext opCtx;
private ISearchOperationCallback searchCallback;
private RangePredicate predicate;
- private IIndexAccessor memBTreeAccessor;
- private boolean includeMemComponent;
+ private boolean includeMutableComponent;
private int numBTrees;
- private IIndexAccessor[] bTreeAccessors;
+ private IIndexAccessor[] btreeAccessors;
private ILSMHarness lsmHarness;
private boolean nextHasBeenCalled;
private boolean foundTuple;
private ITupleReference frameTuple;
+ private List<ILSMComponent> operationalComponents;
public LSMBTreePointSearchCursor(ILSMIndexOperationContext opCtx) {
this.opCtx = opCtx;
@@ -67,7 +68,7 @@
}
boolean reconciled = false;
for (int i = 0; i < numBTrees; ++i) {
- bTreeAccessors[i].search(rangeCursors[i], predicate);
+ btreeAccessors[i].search(rangeCursors[i], predicate);
if (rangeCursors[i].hasNext()) {
rangeCursors[i].next();
// We use the predicate's to lock the key instead of the tuple that we get from cursor to avoid copying the tuple when we do the "unlatch dance"
@@ -83,14 +84,14 @@
return true;
}
}
- if (i == 0 && includeMemComponent) {
+ if (i == 0 && includeMutableComponent) {
// unlatch/unpin
rangeCursors[i].reset();
searchCallback.reconcile(predicate.getLowKey());
reconciled = true;
// retraverse
- memBTreeAccessor.search(rangeCursors[i], predicate);
+ btreeAccessors[0].search(rangeCursors[i], predicate);
searchCallback.complete(predicate.getLowKey());
if (rangeCursors[i].hasNext()) {
rangeCursors[i].next();
@@ -123,14 +124,14 @@
@Override
public void reset() throws HyracksDataException, IndexException {
try {
- if (rangeCursors != null) {
- for (int i = 0; i < rangeCursors.length; ++i) {
- rangeCursors[i].reset();
+ if (rangeCursors != null) {
+ for (int i = 0; i < rangeCursors.length; ++i) {
+ rangeCursors[i].reset();
+ }
}
- }
- rangeCursors = null;
- nextHasBeenCalled = false;
- foundTuple = false;
+ rangeCursors = null;
+ nextHasBeenCalled = false;
+ foundTuple = false;
} finally {
if (lsmHarness != null) {
lsmHarness.endSearch(opCtx);
@@ -141,41 +142,30 @@
@Override
public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
LSMBTreeCursorInitialState lsmInitialState = (LSMBTreeCursorInitialState) initialState;
- includeMemComponent = lsmInitialState.getIncludeMemComponent();
+ operationalComponents = lsmInitialState.getOperationalComponents();
lsmHarness = lsmInitialState.getLSMHarness();
searchCallback = lsmInitialState.getSearchOperationCallback();
- memBTreeAccessor = lsmInitialState.getMemBTreeAccessor();
predicate = (RangePredicate) lsmInitialState.getSearchPredicate();
-
- numBTrees = lsmInitialState.getNumBTrees();
+ numBTrees = operationalComponents.size();
rangeCursors = new IIndexCursor[numBTrees];
- int i = 0;
- if (includeMemComponent) {
- // No need for a bloom filter for the in-memory BTree.
- IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
- rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
- ++i;
- }
- for (; i < numBTrees; ++i) {
- IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
- rangeCursors[i] = new BloomFilterAwareBTreePointSearchCursor(leafFrame, false,
- ((LSMBTreeImmutableComponent) lsmInitialState.getOperationalComponents().get(i)).getBloomFilter());
- }
+ btreeAccessors = new IIndexAccessor[numBTrees];
+ includeMutableComponent = false;
- bTreeAccessors = new IIndexAccessor[numBTrees];
- int cursorIx = 0;
- ListIterator<ILSMComponent> btreesIter = lsmInitialState.getOperationalComponents().listIterator();
- if (includeMemComponent) {
- bTreeAccessors[cursorIx] = memBTreeAccessor;
- ++cursorIx;
- btreesIter.next();
- }
-
- while (btreesIter.hasNext()) {
- BTree diskBTree = ((LSMBTreeImmutableComponent) btreesIter.next()).getBTree();
- bTreeAccessors[cursorIx] = diskBTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- cursorIx++;
+ for (int i = 0; i < numBTrees; i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ BTree btree;
+ IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
+ if (component.getType() == LSMComponentType.MEMORY) {
+ includeMutableComponent = true;
+ // No need for a bloom filter for the in-memory BTree.
+ rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
+ btree = (BTree) ((LSMBTreeMemoryComponent) component).getBTree();
+ } else {
+ rangeCursors[i] = new BloomFilterAwareBTreePointSearchCursor(leafFrame, false,
+ ((LSMBTreeDiskComponent) component).getBloomFilter());
+ btree = (BTree) ((LSMBTreeDiskComponent) component).getBTree();
+ }
+ btreeAccessors[i] = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
}
nextHasBeenCalled = false;
foundTuple = false;
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
index 4bdebae..6eada4b 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/impls/LSMBTreeRangeSearchCursor.java
@@ -16,7 +16,6 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.impls;
import java.util.Iterator;
-import java.util.ListIterator;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
@@ -35,6 +34,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
@@ -44,7 +44,7 @@
private ISearchOperationCallback searchCallback;
private RangePredicate predicate;
- private IIndexAccessor memBTreeAccessor;
+ private IIndexAccessor[] btreeAccessors;
private ArrayTupleBuilder tupleBuilder;
private boolean proceed = true;
@@ -72,25 +72,25 @@
if (!outputPriorityQueue.isEmpty()) {
PriorityQueueElement checkElement = outputPriorityQueue.peek();
if (proceed && !searchCallback.proceed(checkElement.getTuple())) {
- if (includeMemComponent) {
- PriorityQueueElement inMemElement = null;
- boolean inMemElementFound = false;
- // scan the PQ for the in-memory component's element
+ if (includeMutableComponent) {
+ PriorityQueueElement mutableElement = null;
+ boolean mutableElementFound = false;
+ // scan the PQ for the mutable component's element
Iterator<PriorityQueueElement> it = outputPriorityQueue.iterator();
while (it.hasNext()) {
- inMemElement = it.next();
- if (inMemElement.getCursorIndex() == 0) {
- inMemElementFound = true;
+ mutableElement = it.next();
+ if (mutableElement.getCursorIndex() == 0) {
+ mutableElementFound = true;
it.remove();
break;
}
}
- if (inMemElementFound) {
+ if (mutableElementFound) {
// copy the in-mem tuple
if (tupleBuilder == null) {
tupleBuilder = new ArrayTupleBuilder(cmp.getKeyFieldCount());
}
- TupleUtils.copyTuple(tupleBuilder, inMemElement.getTuple(), cmp.getKeyFieldCount());
+ TupleUtils.copyTuple(tupleBuilder, mutableElement.getTuple(), cmp.getKeyFieldCount());
copyTuple.reset(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray());
// unlatch/unpin
@@ -105,11 +105,11 @@
}
// retraverse
reusablePred.setLowKey(copyTuple, true);
- memBTreeAccessor.search(rangeCursors[0], reusablePred);
- boolean isNotExhaustedCursor = pushIntoPriorityQueue(inMemElement);
-
+ btreeAccessors[0].search(rangeCursors[0], reusablePred);
+ boolean isNotExhaustedCursor = pushIntoPriorityQueue(mutableElement);
+
if (checkElement.getCursorIndex() == 0) {
- if (!isNotExhaustedCursor || cmp.compare(copyTuple, inMemElement.getTuple()) != 0) {
+ if (!isNotExhaustedCursor || cmp.compare(copyTuple, mutableElement.getTuple()) != 0) {
searchCallback.complete(copyTuple);
searchCallback.cancel(copyTuple);
continue;
@@ -117,7 +117,7 @@
searchCallback.complete(copyTuple);
}
} else {
- // the in-memory cursor is exhausted
+ // the mutable cursor is exhausted
searchCallback.reconcile(checkElement.getTuple());
}
} else {
@@ -174,46 +174,34 @@
IndexException {
LSMBTreeCursorInitialState lsmInitialState = (LSMBTreeCursorInitialState) initialState;
cmp = lsmInitialState.getOriginalKeyComparator();
- includeMemComponent = lsmInitialState.getIncludeMemComponent();
operationalComponents = lsmInitialState.getOperationalComponents();
lsmHarness = lsmInitialState.getLSMHarness();
searchCallback = lsmInitialState.getSearchOperationCallback();
- memBTreeAccessor = lsmInitialState.getMemBTreeAccessor();
predicate = (RangePredicate) lsmInitialState.getSearchPredicate();
reusablePred.setLowKeyComparator(cmp);
reusablePred.setHighKey(predicate.getHighKey(), predicate.isHighKeyInclusive());
reusablePred.setHighKeyComparator(predicate.getHighKeyComparator());
+ includeMutableComponent = false;
- int numBTrees = lsmInitialState.getNumBTrees();
+ int numBTrees = operationalComponents.size();
rangeCursors = new IIndexCursor[numBTrees];
+
+ btreeAccessors = new ITreeIndexAccessor[numBTrees];
for (int i = 0; i < numBTrees; i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ BTree btree;
IBTreeLeafFrame leafFrame = (IBTreeLeafFrame) lsmInitialState.getLeafFrameFactory().createFrame();
rangeCursors[i] = new BTreeRangeSearchCursor(leafFrame, false);
+ if (component.getType() == LSMComponentType.MEMORY) {
+ includeMutableComponent = true;
+ btree = (BTree) ((LSMBTreeMemoryComponent) component).getBTree();
+ } else {
+ btree = (BTree) ((LSMBTreeDiskComponent) component).getBTree();
+ }
+ btreeAccessors[i] = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ btreeAccessors[i].search(rangeCursors[i], searchPred);
}
setPriorityQueueComparator();
-
- int cursorIx = 0;
- ListIterator<ILSMComponent> btreesIter = operationalComponents.listIterator();
- if (includeMemComponent) {
- // Open cursor of in-memory BTree at index 0.
- memBTreeAccessor.search(rangeCursors[cursorIx], searchPred);
- // Skip 0 because it is the in-memory BTree.
- ++cursorIx;
- btreesIter.next();
- }
-
- // Open cursors of on-disk BTrees.
- int numDiskComponents = includeMemComponent ? numBTrees - 1 : numBTrees;
- ITreeIndexAccessor[] diskBTreeAccessors = new ITreeIndexAccessor[numDiskComponents];
- int diskBTreeIx = 0;
- while (btreesIter.hasNext()) {
- BTree diskBTree = (BTree) ((LSMBTreeImmutableComponent) btreesIter.next()).getBTree();
- diskBTreeAccessors[diskBTreeIx] = diskBTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- diskBTreeAccessors[diskBTreeIx].search(rangeCursors[cursorIx], searchPred);
- cursorIx++;
- diskBTreeIx++;
- }
initPriorityQueue();
proceed = true;
}
diff --git a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
index 507ec02..8b363ff 100644
--- a/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
+++ b/hyracks/hyracks-storage-am-lsm-btree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeUtils.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.util;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
@@ -31,7 +33,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTreeFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.btree.tuples.LSMBTreeCopyTupleWriterFactory;
import edu.uci.ics.hyracks.storage.am.lsm.btree.tuples.LSMBTreeTupleWriterFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
@@ -43,11 +45,11 @@
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public class LSMBTreeUtils {
- public static LSMBTree createLSMTree(IVirtualBufferCache virtualBufferCache, FileReference file,
+ public static LSMBTree createLSMTree(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] cmpFactories, int[] bloomFilterKeyFields, double bloomFilterFalsePositiveRate,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ ILSMIOOperationCallback ioOpCallback) {
LSMBTreeTupleWriterFactory insertTupleWriterFactory = new LSMBTreeTupleWriterFactory(typeTraits,
cmpFactories.length, false);
LSMBTreeTupleWriterFactory deleteTupleWriterFactory = new LSMBTreeTupleWriterFactory(typeTraits,
@@ -73,10 +75,10 @@
ILSMIndexFileManager fileNameManager = new LSMBTreeFileManager(diskFileMapProvider, file, diskBTreeFactory);
- LSMBTree lsmTree = new LSMBTree(virtualBufferCache, interiorFrameFactory, insertLeafFrameFactory,
+ LSMBTree lsmTree = new LSMBTree(virtualBufferCaches, interiorFrameFactory, insertLeafFrameFactory,
deleteLeafFrameFactory, fileNameManager, diskBTreeFactory, bulkLoadBTreeFactory, bloomFilterFactory,
bloomFilterFalsePositiveRate, diskFileMapProvider, typeTraits.length, cmpFactories, mergePolicy,
- opTracker, ioScheduler, ioOpCallbackProvider);
+ opTracker, ioScheduler, ioOpCallback);
return lsmTree;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/pom.xml b/hyracks/hyracks-storage-am-lsm-common/pom.xml
index d52424b..4a6fe50 100644
--- a/hyracks/hyracks-storage-am-lsm-common/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-common/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,21 +39,21 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-bloomfilter</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponent.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponent.java
index 72a3e1f..afbabe0 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMComponent.java
@@ -18,7 +18,27 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMOperationType;
public interface ILSMComponent {
- public boolean threadEnter(LSMOperationType opType) throws InterruptedException;
- public void threadExit(LSMOperationType opType, boolean failedOperation) throws HyracksDataException;
+ enum LSMComponentType {
+ MEMORY,
+ DISK
+ }
+
+ enum ComponentState {
+ INACTIVE,
+ READABLE_WRITABLE,
+ READABLE_UNWRITABLE,
+ READABLE_UNWRITABLE_FLUSHING,
+ UNREADABLE_UNWRITABLE,
+ READABLE_MERGING
+ }
+
+ public boolean threadEnter(LSMOperationType opType, boolean isMutableComponent) throws HyracksDataException;
+
+ public void threadExit(LSMOperationType opType, boolean failedOperation, boolean isMutableComponent)
+ throws HyracksDataException;
+
+ public LSMComponentType getType();
+
+ public ComponentState getState();
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMHarness.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMHarness.java
index a006b97..bc7cbf7 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMHarness.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMHarness.java
@@ -29,8 +29,6 @@
public boolean modify(ILSMIndexOperationContext ctx, boolean tryOperation, ITupleReference tuple)
throws HyracksDataException, IndexException;
- public void noOp(ILSMIndexOperationContext ctx) throws HyracksDataException;
-
public void search(ILSMIndexOperationContext ctx, IIndexCursor cursor, ISearchPredicate pred)
throws HyracksDataException, IndexException;
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperation.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperation.java
index c23dcea..2c3940f 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperation.java
@@ -15,17 +15,18 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.api;
import java.util.Set;
+import java.util.concurrent.Callable;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-public interface ILSMIOOperation {
+public interface ILSMIOOperation extends Callable<Boolean> {
public Set<IODeviceHandle> getReadDevices();
public Set<IODeviceHandle> getWriteDevices();
- public void perform() throws HyracksDataException, IndexException;
+ public Boolean call() throws HyracksDataException, IndexException;
public ILSMIOOperationCallback getCallback();
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallback.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallback.java
index 7086b59..9af08fa 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallback.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallback.java
@@ -17,12 +17,36 @@
import java.util.List;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMOperationType;
public interface ILSMIOOperationCallback {
- public void beforeOperation() throws HyracksDataException;
- public void afterOperation(List<ILSMComponent> oldComponents, ILSMComponent newComponent)
+ /**
+ * This method is called on an IO operation sometime before the operation is executed.
+ * (i.e. IO operations could be flush or merge operations.)
+ */
+ public void beforeOperation(LSMOperationType opType) throws HyracksDataException;
+
+ /**
+ * This method is called on an IO operation sometime after the operation was completed.
+ * (i.e. IO operations could be flush or merge operations.)
+ *
+ * @param oldComponents
+ * @param newComponent
+ * @throws HyracksDataException
+ */
+ public void afterOperation(LSMOperationType opType, List<ILSMComponent> oldComponents, ILSMComponent newComponent)
throws HyracksDataException;
- public void afterFinalize(ILSMComponent newComponent) throws HyracksDataException;
+ /**
+ * This method is called on an IO operation when the operation needs any cleanup works
+ * regardless that the IO operation was executed or not. Once the IO operation is executed,
+ * this method should be called after ILSMIOOperationCallback.afterOperation() was called.
+ *
+ * @param newComponent
+ * @throws HyracksDataException
+ */
+ public void afterFinalize(LSMOperationType opType, ILSMComponent newComponent) throws HyracksDataException;
+
+ public void setNumOfMutableComponents(int count);
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallbackFactory.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallbackFactory.java
index 6dbce32..804142e 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallbackFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIOOperationCallbackFactory.java
@@ -17,5 +17,5 @@
import java.io.Serializable;
public interface ILSMIOOperationCallbackFactory extends Serializable {
- public ILSMIOOperationCallback createIOOperationCallback(Object syncObj);
+ public ILSMIOOperationCallback createIOOperationCallback();
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndex.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndex.java
index 105b53a..d37a105 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndex.java
@@ -32,17 +32,17 @@
* concurrent searches/updates/merges may be ongoing.
*/
public interface ILSMIndex extends IIndex {
-
+
public void deactivate(boolean flushOnExit) throws HyracksDataException;
public ILSMIndexAccessor createAccessor(IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback);
-
- public boolean getFlushStatus();
+ ISearchOperationCallback searchCallback) throws HyracksDataException;
public ILSMOperationTracker getOperationTracker();
public ILSMIOOperationScheduler getIOScheduler();
+
+ public ILSMIOOperationCallback getIOOperationCallback();
public List<ILSMComponent> getImmutableComponents();
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexAccessor.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexAccessor.java
index cd60ded..3405b60 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexAccessor.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexAccessor.java
@@ -109,14 +109,4 @@
public void forceInsert(ITupleReference tuple) throws HyracksDataException, IndexException;
public void forceDelete(ITupleReference tuple) throws HyracksDataException, IndexException;
-
- /**
- * This method can be used to increase the number of 'active' operations of an index artificially,
- * without actually modifying the index.
- * This method does not block and is guaranteed to trigger the {@link ILSMOperationTracker}'s beforeOperation
- * and afterOperation calls.
- *
- * @throws HyracksDataException
- */
- public void noOp() throws HyracksDataException;
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexInternal.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexInternal.java
index c65c7cb..6020093 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexInternal.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexInternal.java
@@ -28,14 +28,14 @@
public interface ILSMIndexInternal extends ILSMIndex {
public ILSMIndexAccessorInternal createAccessor(IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback);
+ ISearchOperationCallback searchCallback) throws HyracksDataException;
public void modify(IIndexOperationContext ictx, ITupleReference tuple) throws HyracksDataException, IndexException;
public void search(ILSMIndexOperationContext ictx, IIndexCursor cursor, ISearchPredicate pred)
throws HyracksDataException, IndexException;
- public boolean scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
+ public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException;
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException;
@@ -43,13 +43,18 @@
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException;
- public ILSMComponent merge(List<ILSMComponent> mergedComponents, ILSMIOOperation operation)
- throws HyracksDataException, IndexException;
+ public ILSMComponent merge(ILSMIOOperation operation) throws HyracksDataException, IndexException;
public void addComponent(ILSMComponent index);
public void subsumeMergedComponents(ILSMComponent newComponent, List<ILSMComponent> mergedComponents);
+ public void changeMutableComponent();
+
+ public void changeFlushStatusForCurrentMutableCompoent(boolean needsFlush);
+
+ public boolean hasFlushRequestForCurrentMutableComponent();
+
/**
* Populates the context's component holder with a snapshot of the components involved in the operation.
*
@@ -58,12 +63,6 @@
*/
public void getOperationalComponents(ILSMIndexOperationContext ctx);
- public List<ILSMComponent> getImmutableComponents();
-
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException;
- public void setFlushStatus(boolean needsFlush);
-
- public boolean isFull();
-
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexOperationContext.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexOperationContext.java
index bbd2ca8..fcd4037 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexOperationContext.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMIndexOperationContext.java
@@ -26,4 +26,6 @@
public ISearchOperationCallback getSearchOperationCallback();
public IModificationOperationCallback getModificationCallback();
+
+ public void setCurrentMutableComponentId(int currentMutableComponentId);
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMMergePolicy.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMMergePolicy.java
index 5eaa6f6..1473071 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMMergePolicy.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/ILSMMergePolicy.java
@@ -19,5 +19,5 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
public interface ILSMMergePolicy {
- public void diskComponentAdded(ILSMIndex index, int totalNumDiskComponents) throws HyracksDataException, IndexException;
+ public void diskComponentAdded(ILSMIndex index) throws HyracksDataException, IndexException;
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/IVirtualBufferCacheProvider.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/IVirtualBufferCacheProvider.java
index cd75df0..eff390f 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/IVirtualBufferCacheProvider.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/api/IVirtualBufferCacheProvider.java
@@ -15,9 +15,10 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.api;
import java.io.Serializable;
+import java.util.List;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
public interface IVirtualBufferCacheProvider extends Serializable {
- public IVirtualBufferCache getVirtualBufferCache(IHyracksTaskContext ctx);
+ public List<IVirtualBufferCache> getVirtualBufferCaches(IHyracksTaskContext ctx);
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
index e79a394..e277edc 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelper.java
@@ -15,10 +15,12 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -30,30 +32,30 @@
protected final double bloomFilterFalsePositiveRate;
- protected final IVirtualBufferCache virtualBufferCache;
+ protected final List<IVirtualBufferCache> virtualBufferCaches;
protected final ILSMMergePolicy mergePolicy;
protected final ILSMIOOperationScheduler ioScheduler;
protected final ILSMOperationTrackerProvider opTrackerFactory;
- protected final ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected final ILSMIOOperationCallbackFactory ioOpCallbackFactory;
public AbstractLSMIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, ILSMMergePolicy mergePolicy,
+ List<IVirtualBufferCache> virtualBufferCaches, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- this(opDesc, ctx, partition, virtualBufferCache, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ this(opDesc, ctx, partition, virtualBufferCaches, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackFactory);
}
public AbstractLSMIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy,
- ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
+ ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
super(opDesc, ctx, partition);
- this.virtualBufferCache = virtualBufferCache;
+ this.virtualBufferCaches = virtualBufferCaches;
this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
this.mergePolicy = mergePolicy;
this.opTrackerFactory = opTrackerFactory;
this.ioScheduler = ioScheduler;
- this.ioOpCallbackProvider = ioOpCallbackProvider;
+ this.ioOpCallbackFactory = ioOpCallbackFactory;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelperFactory.java
index 205bf27..2c082bb 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/AbstractLSMIndexDataflowHelperFactory.java
@@ -16,7 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.dataflow;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -29,18 +29,18 @@
protected final ILSMMergePolicyProvider mergePolicyProvider;
protected final ILSMOperationTrackerProvider opTrackerFactory;
protected final ILSMIOOperationSchedulerProvider ioSchedulerProvider;
- protected final ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected final ILSMIOOperationCallbackFactory ioOpCallbackFactory;
protected final double bloomFilterFalsePositiveRate;
public AbstractLSMIndexDataflowHelperFactory(IVirtualBufferCacheProvider virtualBufferCacheProvider,
ILSMMergePolicyProvider mergePolicyProvider, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
double bloomFilterFalsePositiveRate) {
this.virtualBufferCacheProvider = virtualBufferCacheProvider;
this.mergePolicyProvider = mergePolicyProvider;
this.opTrackerFactory = opTrackerFactory;
this.ioSchedulerProvider = ioSchedulerProvider;
- this.ioOpCallbackProvider = ioOpCallbackProvider;
+ this.ioOpCallbackFactory = ioOpCallbackFactory;
this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMIndexInsertUpdateDeleteOperatorNodePushable.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMIndexInsertUpdateDeleteOperatorNodePushable.java
index 4c69634..5bf52e4 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMIndexInsertUpdateDeleteOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/dataflow/LSMIndexInsertUpdateDeleteOperatorNodePushable.java
@@ -46,7 +46,6 @@
if (tupleFilter != null) {
frameTuple.reset(accessor, i);
if (!tupleFilter.accept(frameTuple)) {
- lsmAccessor.noOp();
continue;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractDiskLSMComponent.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractDiskLSMComponent.java
new file mode 100644
index 0000000..bc6baeb
--- /dev/null
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractDiskLSMComponent.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public abstract class AbstractDiskLSMComponent extends AbstractLSMComponent {
+
+ public AbstractDiskLSMComponent() {
+ super();
+ state = ComponentState.READABLE_UNWRITABLE;
+ }
+
+ @Override
+ public boolean threadEnter(LSMOperationType opType, boolean isMutableComponent) {
+ assert state != ComponentState.INACTIVE;
+
+ switch (opType) {
+ case FORCE_MODIFICATION:
+ case MODIFICATION:
+ case SEARCH:
+ readerCount++;
+ break;
+ case MERGE:
+ if (state == ComponentState.READABLE_MERGING) {
+ // This should never happen unless there are two concurrent merges that were scheduled
+ // concurrently and they have interleaving components to be merged.
+ // This should be handled properly by the merge policy, but we guard against that here anyway.
+ return false;
+ }
+ state = ComponentState.READABLE_MERGING;
+ readerCount++;
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported operation " + opType);
+ }
+ return true;
+ }
+
+ @Override
+ public void threadExit(LSMOperationType opType, boolean failedOperation, boolean isMutableComponent)
+ throws HyracksDataException {
+ switch (opType) {
+ case MERGE:
+ // In case two merge operations were scheduled to merge an overlapping set of components, the second merge will fail and it must
+ // reset those components back to their previous state.
+ if (failedOperation) {
+ state = ComponentState.READABLE_UNWRITABLE;
+ }
+ case FORCE_MODIFICATION:
+ case MODIFICATION:
+ case SEARCH:
+ readerCount--;
+ if (readerCount == 0 && state == ComponentState.READABLE_MERGING) {
+ state = ComponentState.INACTIVE;
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported operation " + opType);
+ }
+ assert readerCount > -1;
+ }
+
+ @Override
+ public LSMComponentType getType() {
+ return LSMComponentType.DISK;
+ }
+
+ @Override
+ public ComponentState getState() {
+ return state;
+ }
+
+ protected abstract void destroy() throws HyracksDataException;
+
+}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractImmutableLSMComponent.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractImmutableLSMComponent.java
deleted file mode 100644
index 376c2e4..0000000
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractImmutableLSMComponent.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
-
-public abstract class AbstractImmutableLSMComponent implements ILSMComponent {
-
- private ComponentState state;
- private int readerCount;
-
- private enum ComponentState {
- READABLE,
- READABLE_MERGING,
- KILLED
- }
-
- public AbstractImmutableLSMComponent() {
- state = ComponentState.READABLE;
- readerCount = 0;
- }
-
- @Override
- public synchronized boolean threadEnter(LSMOperationType opType) {
- if (state == ComponentState.KILLED) {
- return false;
- }
-
- switch (opType) {
- case FORCE_MODIFICATION:
- case MODIFICATION:
- case SEARCH:
- readerCount++;
- break;
- case MERGE:
- if (state == ComponentState.READABLE_MERGING) {
- return false;
- }
- state = ComponentState.READABLE_MERGING;
- readerCount++;
- break;
- default:
- throw new UnsupportedOperationException("Unsupported operation " + opType);
- }
- return true;
- }
-
- @Override
- public synchronized void threadExit(LSMOperationType opType, boolean failedOperation) throws HyracksDataException {
- switch (opType) {
- case MERGE:
- if (failedOperation) {
- state = ComponentState.READABLE;
- }
- case FORCE_MODIFICATION:
- case MODIFICATION:
- case SEARCH:
- readerCount--;
-
- if (readerCount == 0 && state == ComponentState.READABLE_MERGING) {
- destroy();
- state = ComponentState.KILLED;
- }
- break;
- default:
- throw new UnsupportedOperationException("Unsupported operation " + opType);
- }
- }
-
- protected abstract void destroy() throws HyracksDataException;
-
-}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMComponent.java
similarity index 61%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMComponent.java
index cde5022..5dab25f 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMComponent.java
@@ -12,17 +12,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+public abstract class AbstractLSMComponent implements ILSMComponent {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+ protected ComponentState state;
+ protected int readerCount;
+
+ public AbstractLSMComponent() {
+ readerCount = 0;
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ @Override
+ public ComponentState getState() {
+ return state;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
index d9da9e5..44bcfc2 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractLSMIndex.java
@@ -18,13 +18,15 @@
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
-import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
@@ -41,37 +43,45 @@
protected final ILSMHarness lsmHarness;
protected final ILSMIOOperationScheduler ioScheduler;
- protected final ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected final ILSMIOOperationCallback ioOpCallback;
// In-memory components.
- protected final IVirtualBufferCache virtualBufferCache;
+ protected final List<ILSMComponent> memoryComponents;
+ protected final List<IVirtualBufferCache> virtualBufferCaches;
+ protected AtomicInteger currentMutableComponentId;
// On-disk components.
protected final IBufferCache diskBufferCache;
protected final ILSMIndexFileManager fileManager;
protected final IFileMapProvider diskFileMapProvider;
- protected final AtomicReference<List<ILSMComponent>> componentsRef;
+ protected final List<ILSMComponent> diskComponents;
protected final double bloomFilterFalsePositiveRate;
protected boolean isActivated;
- private boolean needsFlush = false;
+ protected final AtomicBoolean[] flushRequests;
- public AbstractLSMIndex(IVirtualBufferCache virtualBufferCache, IBufferCache diskBufferCache,
+ public AbstractLSMIndex(List<IVirtualBufferCache> virtualBufferCaches, IBufferCache diskBufferCache,
ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider,
double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- this.virtualBufferCache = virtualBufferCache;
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback) {
+ this.virtualBufferCaches = virtualBufferCaches;
this.diskBufferCache = diskBufferCache;
this.diskFileMapProvider = diskFileMapProvider;
this.fileManager = fileManager;
this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
this.ioScheduler = ioScheduler;
- this.ioOpCallbackProvider = ioOpCallbackProvider;
+ this.ioOpCallback = ioOpCallback;
+ this.ioOpCallback.setNumOfMutableComponents(virtualBufferCaches.size());
lsmHarness = new LSMHarness(this, mergePolicy, opTracker);
isActivated = false;
- componentsRef = new AtomicReference<List<ILSMComponent>>();
- componentsRef.set(new LinkedList<ILSMComponent>());
+ diskComponents = new LinkedList<ILSMComponent>();
+ memoryComponents = new ArrayList<ILSMComponent>();
+ currentMutableComponentId = new AtomicInteger();
+ flushRequests = new AtomicBoolean[virtualBufferCaches.size()];
+ for (int i = 0; i < virtualBufferCaches.size(); i++) {
+ flushRequests[i] = new AtomicBoolean();
+ }
}
protected void forceFlushDirtyPages(ITreeIndex treeIndex) throws HyracksDataException {
@@ -129,44 +139,35 @@
@Override
public void addComponent(ILSMComponent c) {
- List<ILSMComponent> oldList = componentsRef.get();
- List<ILSMComponent> newList = new ArrayList<ILSMComponent>();
- newList.add(c);
- for (ILSMComponent oc : oldList) {
- newList.add(oc);
- }
- componentsRef.set(newList);
+ diskComponents.add(0, c);
}
@Override
public void subsumeMergedComponents(ILSMComponent newComponent, List<ILSMComponent> mergedComponents) {
- List<ILSMComponent> oldList = componentsRef.get();
- List<ILSMComponent> newList = new ArrayList<ILSMComponent>();
- int swapIndex = oldList.indexOf(mergedComponents.get(0));
- int swapSize = mergedComponents.size();
- for (int i = 0; i < oldList.size(); i++) {
- if (i < swapIndex || i >= swapIndex + swapSize) {
- newList.add(oldList.get(i));
- } else if (i == swapIndex) {
- newList.add(newComponent);
- }
- }
- componentsRef.set(newList);
+ int swapIndex = diskComponents.indexOf(mergedComponents.get(0));
+ diskComponents.removeAll(mergedComponents);
+ diskComponents.add(swapIndex, newComponent);
+ }
+
+ @Override
+ public void changeMutableComponent() {
+ currentMutableComponentId.set((currentMutableComponentId.get() + 1) % memoryComponents.size());
+ ((AbstractMemoryLSMComponent) memoryComponents.get(currentMutableComponentId.get())).setActive();
}
@Override
public List<ILSMComponent> getImmutableComponents() {
- return componentsRef.get();
+ return diskComponents;
}
@Override
- public void setFlushStatus(boolean needsFlush) {
- this.needsFlush = needsFlush;
+ public void changeFlushStatusForCurrentMutableCompoent(boolean needsFlush) {
+ flushRequests[currentMutableComponentId.get()].set(needsFlush);
}
@Override
- public boolean getFlushStatus() {
- return needsFlush;
+ public boolean hasFlushRequestForCurrentMutableComponent() {
+ return flushRequests[currentMutableComponentId.get()].get();
}
@Override
@@ -180,8 +181,8 @@
}
@Override
- public boolean isFull() {
- return virtualBufferCache.isFull();
+ public ILSMIOOperationCallback getIOOperationCallback() {
+ return ioOpCallback;
}
@Override
@@ -189,6 +190,18 @@
return diskBufferCache;
}
+ public boolean isEmptyIndex() throws HyracksDataException {
+ boolean isModified = false;
+ for (ILSMComponent c : memoryComponents) {
+ AbstractMemoryLSMComponent mutableComponent = (AbstractMemoryLSMComponent) c;
+ if (mutableComponent.isModified()) {
+ isModified = true;
+ break;
+ }
+ }
+ return diskComponents.isEmpty() && !isModified;
+ }
+
@Override
public String toString() {
return "LSMIndex [" + fileManager.getBaseDir() + "]";
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMemoryLSMComponent.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMemoryLSMComponent.java
new file mode 100644
index 0000000..ce4817b
--- /dev/null
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMemoryLSMComponent.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
+
+public abstract class AbstractMemoryLSMComponent extends AbstractLSMComponent {
+
+ private int writerCount;
+ private final IVirtualBufferCache vbc;
+ private final AtomicBoolean isModified;
+ private boolean requestedToBeActive;
+
+ public AbstractMemoryLSMComponent(IVirtualBufferCache vbc, boolean isActive) {
+ super();
+ this.vbc = vbc;
+ writerCount = 0;
+ if (isActive) {
+ state = ComponentState.READABLE_WRITABLE;
+ } else {
+ state = ComponentState.INACTIVE;
+ }
+ isModified = new AtomicBoolean();
+ }
+
+ @Override
+ public boolean threadEnter(LSMOperationType opType, boolean isMutableComponent) throws HyracksDataException {
+ if (state == ComponentState.INACTIVE && requestedToBeActive) {
+ state = ComponentState.READABLE_WRITABLE;
+ requestedToBeActive = false;
+ }
+ switch (opType) {
+ case FORCE_MODIFICATION:
+ if (isMutableComponent) {
+ if (state == ComponentState.READABLE_WRITABLE || state == ComponentState.READABLE_UNWRITABLE) {
+ writerCount++;
+ } else {
+ return false;
+ }
+ } else {
+ if (state == ComponentState.READABLE_UNWRITABLE
+ || state == ComponentState.READABLE_UNWRITABLE_FLUSHING) {
+ readerCount++;
+ } else {
+ return false;
+ }
+ }
+ break;
+ case MODIFICATION:
+ if (isMutableComponent) {
+ if (state == ComponentState.READABLE_WRITABLE) {
+ writerCount++;
+ } else {
+ return false;
+ }
+ } else {
+ if (state == ComponentState.READABLE_UNWRITABLE
+ || state == ComponentState.READABLE_UNWRITABLE_FLUSHING) {
+ readerCount++;
+ } else {
+ return false;
+ }
+ }
+ break;
+ case SEARCH:
+ if (state == ComponentState.READABLE_WRITABLE || state == ComponentState.READABLE_UNWRITABLE
+ || state == ComponentState.READABLE_UNWRITABLE_FLUSHING) {
+ readerCount++;
+ } else {
+ return false;
+ }
+ break;
+ case FLUSH:
+ if (state == ComponentState.READABLE_WRITABLE || state == ComponentState.READABLE_UNWRITABLE) {
+ assert writerCount == 0;
+ state = ComponentState.READABLE_UNWRITABLE_FLUSHING;
+ readerCount++;
+ } else {
+ return false;
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported operation " + opType);
+ }
+ return true;
+ }
+
+ @Override
+ public void threadExit(LSMOperationType opType, boolean failedOperation, boolean isMutableComponent)
+ throws HyracksDataException {
+ switch (opType) {
+ case FORCE_MODIFICATION:
+ case MODIFICATION:
+ if (isMutableComponent) {
+ writerCount--;
+ if (state == ComponentState.READABLE_WRITABLE && isFull()) {
+ state = ComponentState.READABLE_UNWRITABLE;
+ }
+ } else {
+ readerCount--;
+ if (state == ComponentState.UNREADABLE_UNWRITABLE && readerCount == 0) {
+ state = ComponentState.INACTIVE;
+ }
+ }
+ break;
+ case SEARCH:
+ readerCount--;
+ if (state == ComponentState.UNREADABLE_UNWRITABLE && readerCount == 0) {
+ state = ComponentState.INACTIVE;
+ }
+ break;
+ case FLUSH:
+ assert state == ComponentState.READABLE_UNWRITABLE_FLUSHING;
+ readerCount--;
+ if (readerCount == 0) {
+ state = ComponentState.INACTIVE;
+ } else {
+ state = ComponentState.UNREADABLE_UNWRITABLE;
+ }
+ break;
+ default:
+ throw new UnsupportedOperationException("Unsupported operation " + opType);
+ }
+ assert readerCount > -1 && writerCount > -1;
+ }
+
+ public boolean isReadable() {
+ if (state == ComponentState.INACTIVE || state == ComponentState.UNREADABLE_UNWRITABLE) {
+ return false;
+ }
+ return true;
+ }
+
+ @Override
+ public LSMComponentType getType() {
+ return LSMComponentType.MEMORY;
+ }
+
+ @Override
+ public ComponentState getState() {
+ return state;
+ }
+
+ public void setActive() {
+ requestedToBeActive = true;
+ }
+
+ public void setIsModified() {
+ isModified.set(true);
+ }
+
+ public boolean isModified() {
+ return isModified.get();
+ }
+
+ public boolean isFull() {
+ return vbc.isFull();
+ }
+
+ protected void reset() throws HyracksDataException {
+ isModified.set(false);
+ }
+}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMutableLSMComponent.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMutableLSMComponent.java
deleted file mode 100644
index 6ca6581..0000000
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AbstractMutableLSMComponent.java
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
-
-public abstract class AbstractMutableLSMComponent implements ILSMComponent {
-
- private int readerCount;
- private int writerCount;
- private ComponentState state;
-
- private boolean isModified;
-
- private enum ComponentState {
- READABLE_WRITABLE,
- READABLE_UNWRITABLE,
- READABLE_UNWRITABLE_FLUSHING,
- UNREADABLE_UNWRITABLE
- }
-
- public AbstractMutableLSMComponent() {
- readerCount = 0;
- writerCount = 0;
- state = ComponentState.READABLE_WRITABLE;
- isModified = false;
- }
-
- @Override
- public synchronized boolean threadEnter(LSMOperationType opType) throws InterruptedException {
- switch (opType) {
- case FORCE_MODIFICATION:
- if (state != ComponentState.READABLE_WRITABLE && state != ComponentState.READABLE_UNWRITABLE) {
- return false;
- }
- writerCount++;
- break;
- case MODIFICATION:
- if (state != ComponentState.READABLE_WRITABLE) {
- return false;
- }
- writerCount++;
- break;
- case SEARCH:
- if (state == ComponentState.UNREADABLE_UNWRITABLE) {
- return false;
- }
- readerCount++;
- break;
- case FLUSH:
- if (state == ComponentState.READABLE_UNWRITABLE_FLUSHING
- || state == ComponentState.UNREADABLE_UNWRITABLE) {
- return false;
- }
-
- state = ComponentState.READABLE_UNWRITABLE_FLUSHING;
- while (writerCount > 0) {
- wait();
- }
- readerCount++;
- break;
- default:
- throw new UnsupportedOperationException("Unsupported operation " + opType);
- }
- return true;
- }
-
- @Override
- public synchronized void threadExit(LSMOperationType opType, boolean failedOperation) throws HyracksDataException {
- switch (opType) {
- case FORCE_MODIFICATION:
- case MODIFICATION:
- writerCount--;
- if (state == ComponentState.READABLE_WRITABLE && isFull()) {
- state = ComponentState.READABLE_UNWRITABLE;
- }
- break;
- case SEARCH:
- readerCount--;
- if (state == ComponentState.UNREADABLE_UNWRITABLE && readerCount == 0) {
- reset();
- state = ComponentState.READABLE_WRITABLE;
- } else if (state == ComponentState.READABLE_WRITABLE && isFull()) {
- state = ComponentState.READABLE_UNWRITABLE;
- }
- break;
- case FLUSH:
- if (failedOperation) {
- state = isFull() ? ComponentState.READABLE_UNWRITABLE : ComponentState.READABLE_WRITABLE;
- }
- readerCount--;
- if (readerCount == 0) {
- reset();
- state = ComponentState.READABLE_WRITABLE;
- } else if (state == ComponentState.READABLE_UNWRITABLE_FLUSHING) {
- state = ComponentState.UNREADABLE_UNWRITABLE;
- }
- break;
- default:
- throw new UnsupportedOperationException("Unsupported operation " + opType);
- }
- notifyAll();
- }
-
- public void setIsModified() {
- isModified = true;
- }
-
- public boolean isModified() {
- return isModified;
- }
-
- protected abstract boolean isFull();
-
- protected void reset() throws HyracksDataException {
- isModified = false;
- }
-}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AsynchronousScheduler.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AsynchronousScheduler.java
new file mode 100644
index 0000000..25894f1
--- /dev/null
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/AsynchronousScheduler.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
+
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ThreadFactory;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
+
+public class AsynchronousScheduler implements ILSMIOOperationScheduler {
+ public final static AsynchronousScheduler INSTANCE = new AsynchronousScheduler();
+ private ExecutorService executor;
+
+ public void init(ThreadFactory threadFactory) {
+ executor = Executors.newCachedThreadPool(threadFactory);
+ }
+
+ @Override
+ public void scheduleOperation(ILSMIOOperation operation) throws HyracksDataException {
+ executor.submit(operation);
+ }
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BlockingIOOperationCallbackWrapper.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BlockingIOOperationCallbackWrapper.java
index 2510311..fefe812 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BlockingIOOperationCallbackWrapper.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/BlockingIOOperationCallbackWrapper.java
@@ -32,26 +32,31 @@
public synchronized void waitForIO() throws InterruptedException {
if (!notified) {
- this.wait();
+ wait();
}
notified = false;
}
@Override
- public void beforeOperation() throws HyracksDataException {
- wrappedCallback.beforeOperation();
+ public void beforeOperation(LSMOperationType opType) throws HyracksDataException {
+ wrappedCallback.beforeOperation(opType);
}
@Override
- public void afterOperation(List<ILSMComponent> oldComponents, ILSMComponent newComponent)
+ public void afterOperation(LSMOperationType opType, List<ILSMComponent> oldComponents, ILSMComponent newComponent)
throws HyracksDataException {
- wrappedCallback.afterOperation(oldComponents, newComponent);
+ wrappedCallback.afterOperation(opType, oldComponents, newComponent);
}
@Override
- public synchronized void afterFinalize(ILSMComponent newComponent) throws HyracksDataException {
- wrappedCallback.afterFinalize(newComponent);
- this.notifyAll();
+ public synchronized void afterFinalize(LSMOperationType opType, ILSMComponent newComponent) throws HyracksDataException {
+ wrappedCallback.afterFinalize(opType, newComponent);
+ notifyAll();
notified = true;
}
+
+ @Override
+ public void setNumOfMutableComponents(int count) {
+ wrappedCallback.setNumOfMutableComponents(count);
+ }
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ConstantMergePolicy.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ConstantMergePolicy.java
index 0ecfeb5..b6f5657 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ConstantMergePolicy.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ConstantMergePolicy.java
@@ -31,9 +31,8 @@
}
@Override
- public void diskComponentAdded(final ILSMIndex index, int totalNumDiskComponents) throws HyracksDataException,
- IndexException {
- if (totalNumDiskComponents >= threshold) {
+ public void diskComponentAdded(final ILSMIndex index) throws HyracksDataException, IndexException {
+ if (index.getImmutableComponents().size() >= threshold) {
ILSMIndexAccessor accessor = (ILSMIndexAccessor) index.createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
accessor.scheduleMerge(NoOpIOOperationCallback.INSTANCE);
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMHarness.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMHarness.java
index 1632725..ca775b7 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMHarness.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMHarness.java
@@ -15,7 +15,6 @@
package edu.uci.ics.hyracks.storage.am.lsm.common.impls;
-import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -27,6 +26,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
@@ -48,63 +48,154 @@
this.mergePolicy = mergePolicy;
}
- private void threadExit(ILSMIndexOperationContext opCtx, LSMOperationType opType) throws HyracksDataException {
- if (!lsmIndex.getFlushStatus() && lsmIndex.isFull()) {
- lsmIndex.setFlushStatus(true);
- }
- opTracker.afterOperation(lsmIndex, opType, opCtx.getSearchOperationCallback(), opCtx.getModificationCallback());
- }
-
- private boolean getAndEnterComponents(ILSMIndexOperationContext ctx, LSMOperationType opType, boolean tryOperation)
+ private boolean getAndEnterComponents(ILSMIndexOperationContext ctx, LSMOperationType opType, boolean isTryOperation)
throws HyracksDataException {
- int numEntered = 0;
- boolean entranceSuccessful = false;
- List<ILSMComponent> entered = new ArrayList<ILSMComponent>();
-
- while (!entranceSuccessful) {
- entered.clear();
- lsmIndex.getOperationalComponents(ctx);
- List<ILSMComponent> components = ctx.getComponentHolder();
- try {
- for (ILSMComponent c : components) {
- if (!c.threadEnter(opType)) {
- break;
- }
- numEntered++;
- entered.add(c);
- }
- entranceSuccessful = numEntered == components.size();
- } catch (InterruptedException e) {
- entranceSuccessful = false;
- throw new HyracksDataException(e);
- } finally {
- if (!entranceSuccessful) {
- for (ILSMComponent c : components) {
- if (numEntered <= 0) {
- break;
+ synchronized (opTracker) {
+ while (true) {
+ lsmIndex.getOperationalComponents(ctx);
+ // Before entering the components, prune those corner cases that indeed should not proceed.
+ switch (opType) {
+ case FLUSH:
+ ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
+ if (!((AbstractMemoryLSMComponent) flushingComponent).isModified()) {
+ // The mutable component has not been modified by any writer. There is nothing to flush.
+ return false;
}
- c.threadExit(opType, true);
- numEntered--;
- }
+ break;
+ case MERGE:
+ if (ctx.getComponentHolder().size() < 2) {
+ // There is only a single component. There is nothing to merge.
+ return false;
+ }
+ default:
+ break;
+ }
+ if (enterComponents(ctx, opType)) {
+ return true;
+ } else if (isTryOperation) {
+ return false;
+ }
+ try {
+ // Flush and merge operations should never reach this wait call, because they are always try operations.
+ // If they fail to enter the components, then it means that there are an ongoing flush/merge operation on
+ // the same components, so they should not proceed.
+ opTracker.wait();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
}
}
- if (tryOperation && !entranceSuccessful) {
+ }
+ }
+
+ private boolean enterComponents(ILSMIndexOperationContext ctx, LSMOperationType opType) throws HyracksDataException {
+ List<ILSMComponent> components = ctx.getComponentHolder();
+ int numEntered = 0;
+ boolean entranceSuccessful = false;
+ try {
+ for (ILSMComponent c : components) {
+ boolean isMutableComponent = numEntered == 0 && c.getType() == LSMComponentType.MEMORY ? true : false;
+ if (!c.threadEnter(opType, isMutableComponent)) {
+ break;
+ }
+ numEntered++;
+ }
+ entranceSuccessful = numEntered == components.size();
+ } finally {
+ if (!entranceSuccessful) {
+ int i = 0;
+ for (ILSMComponent c : components) {
+ if (numEntered == 0) {
+ break;
+ }
+ boolean isMutableComponent = i == 0 && c.getType() == LSMComponentType.MEMORY ? true : false;
+ c.threadExit(opType, true, isMutableComponent);
+ i++;
+ numEntered--;
+ }
return false;
}
}
-
+ // Check if there is any action that is needed to be taken based on the operation type
+ switch (opType) {
+ case FLUSH:
+ lsmIndex.getIOOperationCallback().beforeOperation(LSMOperationType.FLUSH);
+ // Changing the flush status should *always* precede changing the mutable component.
+ lsmIndex.changeFlushStatusForCurrentMutableCompoent(false);
+ lsmIndex.changeMutableComponent();
+ // Notify all waiting threads whenever a flush has been scheduled since they will check
+ // again if they can grab and enter the mutable component.
+ opTracker.notifyAll();
+ break;
+ case MERGE:
+ lsmIndex.getIOOperationCallback().beforeOperation(LSMOperationType.MERGE);
+ default:
+ break;
+ }
opTracker.beforeOperation(lsmIndex, opType, ctx.getSearchOperationCallback(), ctx.getModificationCallback());
return true;
}
- private void exitComponents(ILSMIndexOperationContext ctx, LSMOperationType opType, boolean failedOperation)
- throws HyracksDataException {
- try {
- for (ILSMComponent c : ctx.getComponentHolder()) {
- c.threadExit(opType, failedOperation);
+ private void exitComponents(ILSMIndexOperationContext ctx, LSMOperationType opType, ILSMComponent newComponent,
+ boolean failedOperation) throws HyracksDataException, IndexException {
+ synchronized (opTracker) {
+ try {
+ int i = 0;
+ // First check if there is any action that is needed to be taken based on the state of each component.
+ for (ILSMComponent c : ctx.getComponentHolder()) {
+ boolean isMutableComponent = i == 0 && c.getType() == LSMComponentType.MEMORY ? true : false;
+ c.threadExit(opType, failedOperation, isMutableComponent);
+ if (c.getType() == LSMComponentType.MEMORY) {
+ switch (c.getState()) {
+ case READABLE_UNWRITABLE:
+ if (isMutableComponent
+ && (opType == LSMOperationType.MODIFICATION || opType == LSMOperationType.FORCE_MODIFICATION)) {
+ lsmIndex.changeFlushStatusForCurrentMutableCompoent(true);
+ }
+ break;
+ case INACTIVE:
+ ((AbstractMemoryLSMComponent) c).reset();
+ // Notify all waiting threads whenever the mutable component's has change to inactive. This is important because
+ // even though we switched the mutable components, it is possible that the component that we just switched
+ // to is still busy flushing its data to disk. Thus, the notification that was issued upon scheduling the flush
+ // is not enough.
+ opTracker.notifyAll();
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (c.getState()) {
+ case INACTIVE:
+ ((AbstractDiskLSMComponent) c).destroy();
+ break;
+ default:
+ break;
+ }
+ }
+ i++;
+ }
+ // Then, perform any action that is needed to be taken based on the operation type.
+ switch (opType) {
+ case FLUSH:
+ // newComponent is null if the flush op. was not performed.
+ if (newComponent != null) {
+ lsmIndex.addComponent(newComponent);
+ mergePolicy.diskComponentAdded(lsmIndex);
+ }
+ break;
+ case MERGE:
+ // newComponent is null if the merge op. was not performed.
+ if (newComponent != null) {
+ lsmIndex.subsumeMergedComponents(newComponent, ctx.getComponentHolder());
+ }
+ break;
+ default:
+ break;
+ }
+ } finally {
+ opTracker.afterOperation(lsmIndex, opType, ctx.getSearchOperationCallback(),
+ ctx.getModificationCallback());
}
- } finally {
- threadExit(ctx, opType);
}
}
@@ -129,10 +220,12 @@
}
try {
lsmIndex.modify(ctx, tuple);
+ // The mutable component is always in the first index.
+ AbstractMemoryLSMComponent mutableComponent = (AbstractMemoryLSMComponent) ctx.getComponentHolder().get(0);
+ mutableComponent.setIsModified();
} finally {
- exitComponents(ctx, opType, false);
+ exitComponents(ctx, opType, null, false);
}
-
return true;
}
@@ -143,11 +236,8 @@
getAndEnterComponents(ctx, opType, false);
try {
lsmIndex.search(ctx, cursor, pred);
- } catch (HyracksDataException e) {
- exitComponents(ctx, opType, true);
- throw e;
- } catch (IndexException e) {
- exitComponents(ctx, opType, true);
+ } catch (HyracksDataException | IndexException e) {
+ exitComponents(ctx, opType, null, true);
throw e;
}
}
@@ -155,95 +245,86 @@
@Override
public void endSearch(ILSMIndexOperationContext ctx) throws HyracksDataException {
if (ctx.getOperation() == IndexOperation.SEARCH) {
- exitComponents(ctx, LSMOperationType.SEARCH, false);
+ try {
+ exitComponents(ctx, LSMOperationType.SEARCH, null, false);
+ } catch (IndexException e) {
+ throw new HyracksDataException(e);
+ }
}
}
@Override
- public void noOp(ILSMIndexOperationContext ctx) throws HyracksDataException {
- LSMOperationType opType = LSMOperationType.NOOP;
- opTracker.beforeOperation(lsmIndex, opType, ctx.getSearchOperationCallback(), ctx.getModificationCallback());
- threadExit(ctx, opType);
- }
-
- @Override
public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException {
if (!getAndEnterComponents(ctx, LSMOperationType.FLUSH, true)) {
+ callback.afterFinalize(LSMOperationType.FLUSH, null);
return;
}
-
- lsmIndex.setFlushStatus(false);
-
- if (!lsmIndex.scheduleFlush(ctx, callback)) {
- callback.beforeOperation();
- callback.afterOperation(null, null);
- callback.afterFinalize(null);
- exitComponents(ctx, LSMOperationType.FLUSH, false);
- }
+ lsmIndex.scheduleFlush(ctx, callback);
}
@Override
public void flush(ILSMIndexOperationContext ctx, ILSMIOOperation operation) throws HyracksDataException,
IndexException {
- operation.getCallback().beforeOperation();
if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info(lsmIndex + ": flushing");
+ LOGGER.info("Started a flush operation for index: " + lsmIndex + " ...");
}
- ILSMComponent newComponent = lsmIndex.flush(operation);
- operation.getCallback().afterOperation(null, newComponent);
- lsmIndex.markAsValid(newComponent);
- operation.getCallback().afterFinalize(newComponent);
-
- lsmIndex.addComponent(newComponent);
- int numComponents = lsmIndex.getImmutableComponents().size();
-
- mergePolicy.diskComponentAdded(lsmIndex, numComponents);
- exitComponents(ctx, LSMOperationType.FLUSH, false);
+ ILSMComponent newComponent = null;
+ try {
+ newComponent = lsmIndex.flush(operation);
+ operation.getCallback().afterOperation(LSMOperationType.FLUSH, null, newComponent);
+ lsmIndex.markAsValid(newComponent);
+ } finally {
+ exitComponents(ctx, LSMOperationType.FLUSH, newComponent, false);
+ operation.getCallback().afterFinalize(LSMOperationType.FLUSH, newComponent);
+ }
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Finished the flush operation for index: " + lsmIndex);
+ }
}
@Override
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException {
- LSMOperationType opType = LSMOperationType.MERGE;
- if (!getAndEnterComponents(ctx, opType, false)) {
+ // Merge should always be a try operation, because it should never fail to enter the components unless the merge policy is erroneous.
+ if (!getAndEnterComponents(ctx, LSMOperationType.MERGE, true)) {
+ callback.afterFinalize(LSMOperationType.MERGE, null);
return;
}
- if (ctx.getComponentHolder().size() > 1) {
- lsmIndex.scheduleMerge(ctx, callback);
- } else {
- exitComponents(ctx, opType, true);
- }
+ lsmIndex.scheduleMerge(ctx, callback);
}
@Override
public void merge(ILSMIndexOperationContext ctx, ILSMIOOperation operation) throws HyracksDataException,
IndexException {
- List<ILSMComponent> mergedComponents = new ArrayList<ILSMComponent>();
- operation.getCallback().beforeOperation();
if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info(lsmIndex + ": merging");
+ LOGGER.info("Started a merge operation for index: " + lsmIndex + " ...");
}
- ILSMComponent newComponent = lsmIndex.merge(mergedComponents, operation);
- ctx.getComponentHolder().addAll(mergedComponents);
- operation.getCallback().afterOperation(mergedComponents, newComponent);
- lsmIndex.markAsValid(newComponent);
- operation.getCallback().afterFinalize(newComponent);
- lsmIndex.subsumeMergedComponents(newComponent, mergedComponents);
- exitComponents(ctx, LSMOperationType.MERGE, false);
+
+ ILSMComponent newComponent = null;
+ try {
+ newComponent = lsmIndex.merge(operation);
+ operation.getCallback().afterOperation(LSMOperationType.MERGE, ctx.getComponentHolder(), newComponent);
+ lsmIndex.markAsValid(newComponent);
+ } finally {
+ exitComponents(ctx, LSMOperationType.MERGE, newComponent, false);
+ operation.getCallback().afterFinalize(LSMOperationType.MERGE, newComponent);
+ }
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Finished the merge operation for index: " + lsmIndex);
+ }
}
@Override
public void addBulkLoadedComponent(ILSMComponent c) throws HyracksDataException, IndexException {
lsmIndex.markAsValid(c);
lsmIndex.addComponent(c);
- int numComponents = lsmIndex.getImmutableComponents().size();
- mergePolicy.diskComponentAdded(lsmIndex, numComponents);
+ mergePolicy.diskComponentAdded(lsmIndex);
}
@Override
public ILSMOperationTracker getOperationTracker() {
return opTracker;
}
-}
+}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexSearchCursor.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexSearchCursor.java
index b6ca21b..45cc69b 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMIndexSearchCursor.java
@@ -39,7 +39,7 @@
protected PriorityQueueComparator pqCmp;
protected MultiComparator cmp;
protected boolean needPush;
- protected boolean includeMemComponent;
+ protected boolean includeMutableComponent;
protected ILSMHarness lsmHarness;
protected final ILSMIndexOperationContext opCtx;
@@ -51,6 +51,10 @@
needPush = false;
}
+ public ILSMIndexOperationContext getOpCtx() {
+ return opCtx;
+ }
+
public void initPriorityQueue() throws HyracksDataException, IndexException {
int pqInitSize = (rangeCursors.length > 0) ? rangeCursors.length : 1;
outputPriorityQueue = new PriorityQueue<PriorityQueueElement>(pqInitSize, pqCmp);
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMOperationType.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMOperationType.java
index d932e6d..853b6d0 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMOperationType.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMOperationType.java
@@ -19,6 +19,5 @@
MODIFICATION,
FORCE_MODIFICATION,
FLUSH,
- MERGE,
- NOOP
+ MERGE
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeIndexAccessor.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeIndexAccessor.java
index 6328b1d..f11a061 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeIndexAccessor.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/LSMTreeIndexAccessor.java
@@ -122,11 +122,6 @@
}
@Override
- public void noOp() throws HyracksDataException {
- lsmHarness.noOp(ctx);
- }
-
- @Override
public void forcePhysicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
ctx.setOperation(IndexOperation.PHYSICALDELETE);
lsmHarness.forceModify(ctx, tuple);
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoMergePolicy.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoMergePolicy.java
index 80ac6d8..17d1b17 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoMergePolicy.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoMergePolicy.java
@@ -21,7 +21,7 @@
INSTANCE;
@Override
- public void diskComponentAdded(ILSMIndex index, int totalNumDiskComponents) {
+ public void diskComponentAdded(ILSMIndex index) {
// Do nothing
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoOpIOOperationCallback.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoOpIOOperationCallback.java
index a43b44b..2c8369e 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoOpIOOperationCallback.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/NoOpIOOperationCallback.java
@@ -19,25 +19,26 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndex;
-public enum NoOpIOOperationCallback implements ILSMIOOperationCallback, ILSMIOOperationCallbackProvider {
+public enum NoOpIOOperationCallback implements ILSMIOOperationCallback, ILSMIOOperationCallbackProvider, ILSMIOOperationCallbackFactory {
INSTANCE;
@Override
- public void beforeOperation() throws HyracksDataException {
+ public void beforeOperation(LSMOperationType opType) throws HyracksDataException {
// Do nothing.
}
@Override
- public void afterOperation(List<ILSMComponent> oldComponents, ILSMComponent newComponent)
+ public void afterOperation(LSMOperationType opType, List<ILSMComponent> oldComponents, ILSMComponent newComponent)
throws HyracksDataException {
// Do nothing.
}
@Override
- public void afterFinalize(ILSMComponent newComponent) throws HyracksDataException {
+ public void afterFinalize(LSMOperationType opType, ILSMComponent newComponent) throws HyracksDataException {
// Do nothing.
}
@@ -45,4 +46,14 @@
public ILSMIOOperationCallback getIOOperationCallback(ILSMIndex index) {
return INSTANCE;
}
+
+ @Override
+ public ILSMIOOperationCallback createIOOperationCallback() {
+ return INSTANCE;
+ }
+
+ @Override
+ public void setNumOfMutableComponents(int count) {
+ // Do nothing.
+ }
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/SynchronousScheduler.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/SynchronousScheduler.java
index 5eb63a9..44c20f5 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/SynchronousScheduler.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/SynchronousScheduler.java
@@ -25,7 +25,7 @@
@Override
public void scheduleOperation(ILSMIOOperation operation) throws HyracksDataException {
try {
- operation.perform();
+ operation.call();
} catch (IndexException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ThreadCountingTracker.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ThreadCountingTracker.java
index 19ee4b8..db75d7b 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ThreadCountingTracker.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/ThreadCountingTracker.java
@@ -22,6 +22,7 @@
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndex;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexInternal;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
public class ThreadCountingTracker implements ILSMOperationTracker {
@@ -51,7 +52,8 @@
IModificationOperationCallback modificationCallback) throws HyracksDataException {
// Flush will only be handled by last exiting thread.
if (opType == LSMOperationType.MODIFICATION) {
- if (threadRefCount.decrementAndGet() == 0 && index.getFlushStatus()) {
+ if (threadRefCount.decrementAndGet() == 0
+ && ((ILSMIndexInternal) index).hasFlushRequestForCurrentMutableComponent()) {
ILSMIndexAccessor accessor = (ILSMIndexAccessor) index.createAccessor(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
accessor.scheduleFlush(NoOpIOOperationCallback.INSTANCE);
diff --git a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/VirtualBufferCache.java b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/VirtualBufferCache.java
index 18c87a1..bd3b278 100644
--- a/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/VirtualBufferCache.java
+++ b/hyracks/hyracks-storage-am-lsm-common/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/common/impls/VirtualBufferCache.java
@@ -41,7 +41,7 @@
private final CacheBucket[] buckets;
private final ArrayList<VirtualPage> pages;
- private int nextFree;
+ private volatile int nextFree;
private boolean open;
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
index 6fd62c1..9e5e91c 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/pom.xml
@@ -19,7 +19,7 @@
<parent>
<artifactId>hyracks</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
@@ -40,14 +40,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelper.java
index 1afd9d5..ed2f8cf 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelper.java
@@ -14,12 +14,14 @@
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -34,19 +36,19 @@
public final class LSMInvertedIndexDataflowHelper extends AbstractLSMIndexDataflowHelper {
public LSMInvertedIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, ILSMMergePolicy mergePolicy,
+ List<IVirtualBufferCache> virtualBufferCaches, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- this(opDesc, ctx, partition, virtualBufferCache, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ this(opDesc, ctx, partition, virtualBufferCaches, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
+ opTrackerFactory, ioScheduler, ioOpCallbackFactory);
}
public LSMInvertedIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy,
- ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(opDesc, ctx, partition, virtualBufferCache, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
+ ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
+ ioScheduler, ioOpCallbackFactory);
}
@Override
@@ -55,12 +57,13 @@
try {
IBufferCache diskBufferCache = opDesc.getStorageManager().getBufferCache(ctx);
IFileMapProvider diskFileMapProvider = opDesc.getStorageManager().getFileMapProvider(ctx);
- LSMInvertedIndex invIndex = InvertedIndexUtils.createLSMInvertedIndex(virtualBufferCache,
+ LSMInvertedIndex invIndex = InvertedIndexUtils.createLSMInvertedIndex(virtualBufferCaches,
diskFileMapProvider, invIndexOpDesc.getInvListsTypeTraits(),
invIndexOpDesc.getInvListsComparatorFactories(), invIndexOpDesc.getTokenTypeTraits(),
invIndexOpDesc.getTokenComparatorFactories(), invIndexOpDesc.getTokenizerFactory(),
diskBufferCache, file.getFile().getPath(), bloomFilterFalsePositiveRate, mergePolicy,
- opTrackerFactory.getOperationTracker(ctx), ioScheduler, ioOpCallbackProvider);
+ opTrackerFactory.getOperationTracker(ctx), ioScheduler,
+ ioOpCallbackFactory.createIOOperationCallback());
return invIndex;
} catch (IndexException e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelperFactory.java
index c9f81fc..84c7150 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/LSMInvertedIndexDataflowHelperFactory.java
@@ -18,7 +18,7 @@
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -31,19 +31,19 @@
public LSMInvertedIndexDataflowHelperFactory(IVirtualBufferCacheProvider virtualBufferCacheProvider,
ILSMMergePolicyProvider mergePolicyProvider, ILSMOperationTrackerProvider opTrackerProvider,
- ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
double bloomFilterFalsePositiveRate) {
super(virtualBufferCacheProvider, mergePolicyProvider, opTrackerProvider, ioSchedulerProvider,
- ioOpCallbackProvider, bloomFilterFalsePositiveRate);
+ ioOpCallbackFactory, bloomFilterFalsePositiveRate);
}
@Override
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
return new LSMInvertedIndexDataflowHelper(opDesc, ctx, partition,
- virtualBufferCacheProvider.getVirtualBufferCache(ctx), bloomFilterFalsePositiveRate,
+ virtualBufferCacheProvider.getVirtualBufferCaches(ctx), bloomFilterFalsePositiveRate,
mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
- ioOpCallbackProvider);
+ ioOpCallbackFactory);
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelper.java
index 0dbd06b..57a41ca 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelper.java
@@ -14,12 +14,14 @@
*/
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -34,19 +36,19 @@
public final class PartitionedLSMInvertedIndexDataflowHelper extends AbstractLSMIndexDataflowHelper {
public PartitionedLSMInvertedIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
- int partition, IVirtualBufferCache virtualBufferCache, ILSMMergePolicy mergePolicy,
+ int partition, List<IVirtualBufferCache> virtualBufferCache, ILSMMergePolicy mergePolicy,
ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
this(opDesc, ctx, partition, virtualBufferCache, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE, mergePolicy,
- opTrackerFactory, ioScheduler, ioOpCallbackProvider);
+ opTrackerFactory, ioScheduler, ioOpCallbackFactory);
}
public PartitionedLSMInvertedIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
- int partition, IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate,
+ int partition, List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(opDesc, ctx, partition, virtualBufferCache, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory) {
+ super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
+ ioScheduler, ioOpCallbackFactory);
}
@Override
@@ -56,11 +58,12 @@
IBufferCache diskBufferCache = opDesc.getStorageManager().getBufferCache(ctx);
IFileMapProvider diskFileMapProvider = opDesc.getStorageManager().getFileMapProvider(ctx);
PartitionedLSMInvertedIndex invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(
- virtualBufferCache, diskFileMapProvider, invIndexOpDesc.getInvListsTypeTraits(),
+ virtualBufferCaches, diskFileMapProvider, invIndexOpDesc.getInvListsTypeTraits(),
invIndexOpDesc.getInvListsComparatorFactories(), invIndexOpDesc.getTokenTypeTraits(),
invIndexOpDesc.getTokenComparatorFactories(), invIndexOpDesc.getTokenizerFactory(),
diskBufferCache, file.getFile().getPath(), bloomFilterFalsePositiveRate, mergePolicy,
- opTrackerFactory.getOperationTracker(ctx), ioScheduler, ioOpCallbackProvider);
+ opTrackerFactory.getOperationTracker(ctx), ioScheduler,
+ ioOpCallbackFactory.createIOOperationCallback());
return invIndex;
} catch (IndexException e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelperFactory.java
index 67cab6d..d78ae7e 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/dataflow/PartitionedLSMInvertedIndexDataflowHelperFactory.java
@@ -18,7 +18,7 @@
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -31,18 +31,18 @@
public PartitionedLSMInvertedIndexDataflowHelperFactory(IVirtualBufferCacheProvider virtualBufferCacheProvider,
ILSMMergePolicyProvider mergePolicyProvider, ILSMOperationTrackerProvider opTrackerProvider,
- ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationSchedulerProvider ioSchedulerProvider, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
double bloomFilterFalsePositiveRate) {
super(virtualBufferCacheProvider, mergePolicyProvider, opTrackerProvider, ioSchedulerProvider,
- ioOpCallbackProvider, bloomFilterFalsePositiveRate);
+ ioOpCallbackFactory, bloomFilterFalsePositiveRate);
}
@Override
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
return new PartitionedLSMInvertedIndexDataflowHelper(opDesc, ctx, partition,
- virtualBufferCacheProvider.getVirtualBufferCache(ctx), bloomFilterFalsePositiveRate,
+ virtualBufferCacheProvider.getVirtualBufferCaches(ctx), bloomFilterFalsePositiveRate,
mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
- ioOpCallbackProvider);
+ ioOpCallbackFactory);
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
index 3eae5a7..cef6aee 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndex.java
@@ -27,7 +27,6 @@
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomCalculations;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterSpecification;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -43,15 +42,16 @@
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IVirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponentFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
@@ -65,6 +65,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BTreeFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BlockingIOOperationCallbackWrapper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory.InMemoryInvertedIndex;
@@ -79,9 +80,6 @@
public class LSMInvertedIndex extends AbstractLSMIndex implements IInvertedIndex {
- // In-memory components.
- protected final LSMInvertedIndexMutableComponent mutableComponent;
- protected final IVirtualFreePageManager virtualFreePageManager;
protected final IBinaryTokenizerFactory tokenizerFactory;
// On-disk components.
@@ -94,31 +92,39 @@
protected final ITypeTraits[] tokenTypeTraits;
protected final IBinaryComparatorFactory[] tokenCmpFactories;
- public LSMInvertedIndex(IVirtualBufferCache virtualBufferCache, OnDiskInvertedIndexFactory diskInvIndexFactory,
- BTreeFactory deletedKeysBTreeFactory, BloomFilterFactory bloomFilterFactory,
- double bloomFilterFalsePositiveRate, ILSMIndexFileManager fileManager,
- IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
+ public LSMInvertedIndex(List<IVirtualBufferCache> virtualBufferCaches,
+ OnDiskInvertedIndexFactory diskInvIndexFactory, BTreeFactory deletedKeysBTreeFactory,
+ BloomFilterFactory bloomFilterFactory, double bloomFilterFalsePositiveRate,
+ ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws IndexException {
- super(virtualBufferCache, diskInvIndexFactory.getBufferCache(), fileManager, diskFileMapProvider,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
- this.virtualFreePageManager = new VirtualFreePageManager(virtualBufferCache.getNumPages());
+ ILSMIOOperationCallback ioOpCallback) throws IndexException {
+ super(virtualBufferCaches, diskInvIndexFactory.getBufferCache(), fileManager, diskFileMapProvider,
+ bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback);
+
this.tokenizerFactory = tokenizerFactory;
this.invListTypeTraits = invListTypeTraits;
this.invListCmpFactories = invListCmpFactories;
this.tokenTypeTraits = tokenTypeTraits;
this.tokenCmpFactories = tokenCmpFactories;
- // Create in-memory component.
- InMemoryInvertedIndex memInvIndex = createInMemoryInvertedIndex(virtualBufferCache);
- BTree deleteKeysBTree = BTreeUtils.createBTree(virtualBufferCache, new VirtualFreePageManager(
- virtualBufferCache.getNumPages()), ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(),
- invListTypeTraits, invListCmpFactories, BTreeLeafFrameType.REGULAR_NSM, new FileReference(new File(
- fileManager.getBaseDir() + "_virtual_del")));
- mutableComponent = new LSMInvertedIndexMutableComponent(memInvIndex, deleteKeysBTree, virtualBufferCache);
- componentFactory = new LSMInvertedIndexComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory,
+
+ componentFactory = new LSMInvertedIndexDiskComponentFactory(diskInvIndexFactory, deletedKeysBTreeFactory,
bloomFilterFactory);
+
+ int i = 0;
+ for (IVirtualBufferCache virtualBufferCache : virtualBufferCaches) {
+ InMemoryInvertedIndex memInvIndex = createInMemoryInvertedIndex(virtualBufferCache,
+ new VirtualFreePageManager(virtualBufferCache.getNumPages()), i);
+ BTree deleteKeysBTree = BTreeUtils.createBTree(virtualBufferCache, new VirtualFreePageManager(
+ virtualBufferCache.getNumPages()), ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(),
+ invListTypeTraits, invListCmpFactories, BTreeLeafFrameType.REGULAR_NSM, new FileReference(new File(
+ fileManager.getBaseDir() + "_virtual_del_" + i)));
+ LSMInvertedIndexMemoryComponent mutableComponent = new LSMInvertedIndexMemoryComponent(memInvIndex,
+ deleteKeysBTree, virtualBufferCache, i == 0 ? true : false);
+ memoryComponents.add(mutableComponent);
+ ++i;
+ }
}
@Override
@@ -129,7 +135,7 @@
fileManager.deleteDirs();
fileManager.createDirs();
- componentsRef.get().clear();
+ diskComponents.clear();
}
@Override
@@ -139,16 +145,20 @@
}
try {
- List<ILSMComponent> immutableComponents = componentsRef.get();
- ((IVirtualBufferCache) mutableComponent.getInvIndex().getBufferCache()).open();
- mutableComponent.getInvIndex().create();
- mutableComponent.getInvIndex().activate();
- mutableComponent.getDeletedKeysBTree().create();
- mutableComponent.getDeletedKeysBTree().activate();
+ List<ILSMComponent> immutableComponents = diskComponents;
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ ((IVirtualBufferCache) mutableComponent.getInvIndex().getBufferCache()).open();
+ mutableComponent.getInvIndex().create();
+ mutableComponent.getInvIndex().activate();
+ mutableComponent.getDeletedKeysBTree().create();
+ mutableComponent.getDeletedKeysBTree().activate();
+ }
+
immutableComponents.clear();
List<LSMComponentFileReferences> validFileReferences = fileManager.cleanupAndGetValidFiles();
for (LSMComponentFileReferences lsmComonentFileReference : validFileReferences) {
- LSMInvertedIndexImmutableComponent component;
+ LSMInvertedIndexDiskComponent component;
try {
component = createDiskInvIndexComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
@@ -171,12 +181,16 @@
if (!isActivated) {
throw new HyracksDataException("Failed to clear the index since it is not activated.");
}
- List<ILSMComponent> immutableComponents = componentsRef.get();
- mutableComponent.getInvIndex().clear();
- mutableComponent.getDeletedKeysBTree().clear();
- mutableComponent.reset();
+
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ mutableComponent.getInvIndex().clear();
+ mutableComponent.getDeletedKeysBTree().clear();
+ mutableComponent.reset();
+ }
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) c;
component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
@@ -195,30 +209,32 @@
isActivated = false;
if (flushOnExit) {
- BlockingIOOperationCallbackWrapper blockingCallBack = new BlockingIOOperationCallbackWrapper(
- ioOpCallbackProvider.getIOOperationCallback(this));
- ILSMIndexAccessor accessor = (ILSMIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- accessor.scheduleFlush(blockingCallBack);
+ BlockingIOOperationCallbackWrapper cb = new BlockingIOOperationCallbackWrapper(
+ ioOpCallback);
+ ILSMIndexAccessor accessor = createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ accessor.scheduleFlush(cb);
try {
- blockingCallBack.waitForIO();
+ cb.waitForIO();
} catch (InterruptedException e) {
throw new HyracksDataException(e);
}
}
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) c;
component.getBloomFilter().deactivate();
component.getInvIndex().deactivate();
component.getDeletedKeysBTree().deactivate();
}
- mutableComponent.getInvIndex().deactivate();
- mutableComponent.getDeletedKeysBTree().deactivate();
- mutableComponent.getInvIndex().destroy();
- mutableComponent.getDeletedKeysBTree().destroy();
- ((IVirtualBufferCache) mutableComponent.getInvIndex().getBufferCache()).close();
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ mutableComponent.getInvIndex().deactivate();
+ mutableComponent.getDeletedKeysBTree().deactivate();
+ mutableComponent.getInvIndex().destroy();
+ mutableComponent.getDeletedKeysBTree().destroy();
+ ((IVirtualBufferCache) mutableComponent.getInvIndex().getBufferCache()).close();
+ }
}
@Override
@@ -232,11 +248,14 @@
throw new HyracksDataException("Failed to destroy the index since it is activated.");
}
- mutableComponent.getInvIndex().destroy();
- mutableComponent.getDeletedKeysBTree().destroy();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ mutableComponent.getInvIndex().destroy();
+ mutableComponent.getDeletedKeysBTree().destroy();
+ }
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) c;
component.getInvIndex().destroy();
component.getDeletedKeysBTree().destroy();
component.getBloomFilter().destroy();
@@ -246,17 +265,29 @@
@Override
public void getOperationalComponents(ILSMIndexOperationContext ctx) {
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
List<ILSMComponent> operationalComponents = ctx.getComponentHolder();
operationalComponents.clear();
+ int cmc = currentMutableComponentId.get();
+ ctx.setCurrentMutableComponentId(cmc);
+ int numMutableComponents = memoryComponents.size();
switch (ctx.getOperation()) {
case FLUSH:
case DELETE:
case INSERT:
- operationalComponents.add(mutableComponent);
+ operationalComponents.add(memoryComponents.get(cmc));
break;
case SEARCH:
- operationalComponents.add(mutableComponent);
+ for (int i = 0; i < numMutableComponents - 1; i++) {
+ ILSMComponent c = memoryComponents.get((cmc + i + 1) % numMutableComponents);
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ if (mutableComponent.isReadable()) {
+ // Make sure newest components are added first
+ operationalComponents.add(0, mutableComponent);
+ }
+ }
+ // The current mutable component is always added
+ operationalComponents.add(0, memoryComponents.get(cmc));
operationalComponents.addAll(immutableComponents);
break;
case MERGE:
@@ -295,17 +326,17 @@
switch (ctx.getOperation()) {
case INSERT: {
// Insert into the in-memory inverted index.
- ctx.memInvIndexAccessor.insert(tuple);
+ ctx.currentMutableInvIndexAccessors.insert(tuple);
break;
}
case DELETE: {
// First remove all entries in the in-memory inverted index (if any).
- ctx.memInvIndexAccessor.delete(tuple);
+ ctx.currentMutableInvIndexAccessors.delete(tuple);
// Insert key into the deleted-keys BTree.
ctx.keysOnlyTuple.reset(tuple);
try {
- ctx.deletedKeysBTreeAccessor.insert(ctx.keysOnlyTuple);
- } catch (BTreeDuplicateKeyException e) {
+ ctx.currentDeletedKeysBTreeAccessors.insert(ctx.keysOnlyTuple);
+ } catch (TreeIndexDuplicateKeyException e) {
// Key has already been deleted.
}
break;
@@ -314,7 +345,6 @@
throw new UnsupportedOperationException("Operation " + ctx.getOperation() + " not supported.");
}
}
- mutableComponent.setIsModified();
}
@Override
@@ -323,58 +353,59 @@
List<ILSMComponent> operationalComponents = ictx.getComponentHolder();
int numComponents = operationalComponents.size();
assert numComponents > 0;
- boolean includeMutableComponent = operationalComponents.get(0) == mutableComponent;
+ boolean includeMutableComponent = false;
ArrayList<IIndexAccessor> indexAccessors = new ArrayList<IIndexAccessor>(numComponents);
ArrayList<IIndexAccessor> deletedKeysBTreeAccessors = new ArrayList<IIndexAccessor>(numComponents);
- if (includeMutableComponent) {
- IIndexAccessor invIndexAccessor = mutableComponent.getInvIndex().createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- indexAccessors.add(invIndexAccessor);
- IIndexAccessor deletedKeysAccessor = mutableComponent.getDeletedKeysBTree().createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- deletedKeysBTreeAccessors.add(deletedKeysAccessor);
- }
- for (int i = includeMutableComponent ? 1 : 0; i < operationalComponents.size(); i++) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) operationalComponents
- .get(i);
- IIndexAccessor invIndexAccessor = component.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- indexAccessors.add(invIndexAccessor);
- IIndexAccessor deletedKeysAccessor = component.getDeletedKeysBTree().createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- deletedKeysBTreeAccessors.add(deletedKeysAccessor);
+ for (int i = 0; i < operationalComponents.size(); i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ if (component.getType() == LSMComponentType.MEMORY) {
+ includeMutableComponent = true;
+ IIndexAccessor invIndexAccessor = ((LSMInvertedIndexMemoryComponent) component).getInvIndex()
+ .createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ indexAccessors.add(invIndexAccessor);
+ IIndexAccessor deletedKeysAccessor = ((LSMInvertedIndexMemoryComponent) component)
+ .getDeletedKeysBTree().createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ deletedKeysBTreeAccessors.add(deletedKeysAccessor);
+ } else {
+ IIndexAccessor invIndexAccessor = ((LSMInvertedIndexDiskComponent) component).getInvIndex()
+ .createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ indexAccessors.add(invIndexAccessor);
+ IIndexAccessor deletedKeysAccessor = ((LSMInvertedIndexDiskComponent) component).getDeletedKeysBTree()
+ .createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ deletedKeysBTreeAccessors.add(deletedKeysAccessor);
+ }
}
ICursorInitialState initState = createCursorInitialState(pred, ictx, includeMutableComponent, indexAccessors,
- deletedKeysBTreeAccessors);
+ deletedKeysBTreeAccessors, operationalComponents);
cursor.open(initState, pred);
}
private ICursorInitialState createCursorInitialState(ISearchPredicate pred, IIndexOperationContext ictx,
boolean includeMutableComponent, ArrayList<IIndexAccessor> indexAccessors,
- ArrayList<IIndexAccessor> deletedKeysBTreeAccessors) {
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ ArrayList<IIndexAccessor> deletedKeysBTreeAccessors, List<ILSMComponent> operationalComponents) {
ICursorInitialState initState = null;
PermutingTupleReference keysOnlyTuple = createKeysOnlyTupleReference();
MultiComparator keyCmp = MultiComparator.createIgnoreFieldLength(invListCmpFactories);
- List<ILSMComponent> operationalComponents = new ArrayList<ILSMComponent>();
- if (includeMutableComponent) {
- operationalComponents.add(mutableComponent);
- }
- operationalComponents.addAll(immutableComponents);
// TODO: This check is not pretty, but it does the job. Come up with something more OO in the future.
// Distinguish between regular searches and range searches (mostly used in merges).
if (pred instanceof InvertedIndexSearchPredicate) {
initState = new LSMInvertedIndexSearchCursorInitialState(keyCmp, keysOnlyTuple, indexAccessors,
- deletedKeysBTreeAccessors, mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), ictx,
- includeMutableComponent, lsmHarness, operationalComponents);
+ deletedKeysBTreeAccessors,
+ ((LSMInvertedIndexMemoryComponent) memoryComponents.get(currentMutableComponentId.get()))
+ .getDeletedKeysBTree().getLeafFrameFactory(), ictx, includeMutableComponent, lsmHarness,
+ operationalComponents);
} else {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
InMemoryInvertedIndex memInvIndex = (InMemoryInvertedIndex) mutableComponent.getInvIndex();
MultiComparator tokensAndKeysCmp = MultiComparator.create(memInvIndex.getBTree().getComparatorFactories());
initState = new LSMInvertedIndexRangeSearchCursorInitialState(tokensAndKeysCmp, keyCmp, keysOnlyTuple,
- mutableComponent.getDeletedKeysBTree().getLeafFrameFactory(), includeMutableComponent, lsmHarness,
+ ((LSMInvertedIndexMemoryComponent) memoryComponents.get(currentMutableComponentId.get()))
+ .getDeletedKeysBTree().getLeafFrameFactory(), includeMutableComponent, lsmHarness,
indexAccessors, deletedKeysBTreeAccessors, pred, operationalComponents);
}
return initState;
@@ -394,22 +425,18 @@
}
@Override
- public boolean scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
+ public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException {
- if (!mutableComponent.isModified()) {
- return false;
- }
+ ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
LSMInvertedIndexOpContext opCtx = createOpContext(NoOpOperationCallback.INSTANCE,
NoOpOperationCallback.INSTANCE);
- ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
opCtx.setOperation(IndexOperation.FLUSH);
opCtx.getComponentHolder().add(flushingComponent);
- ioScheduler.scheduleOperation(new LSMInvertedIndexFlushOperation(new LSMInvertedIndexAccessor(this, lsmHarness,
- fileManager, opCtx), mutableComponent, componentFileRefs.getInsertIndexFileReference(),
- componentFileRefs.getDeleteIndexFileReference(), componentFileRefs.getBloomFilterFileReference(),
- callback));
- return true;
+ ioScheduler.scheduleOperation(new LSMInvertedIndexFlushOperation(
+ new LSMInvertedIndexAccessor(lsmHarness, opCtx), flushingComponent, componentFileRefs
+ .getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(),
+ componentFileRefs.getBloomFilterFileReference(), callback));
}
@Override
@@ -417,13 +444,14 @@
LSMInvertedIndexFlushOperation flushOp = (LSMInvertedIndexFlushOperation) operation;
// Create an inverted index instance to be bulk loaded.
- LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
+ LSMInvertedIndexDiskComponent component = createDiskInvIndexComponent(componentFactory,
flushOp.getDictBTreeFlushTarget(), flushOp.getDeletedKeysBTreeFlushTarget(),
flushOp.getBloomFilterFlushTarget(), true);
IInvertedIndex diskInvertedIndex = component.getInvIndex();
// Create a scan cursor on the BTree underlying the in-memory inverted index.
- LSMInvertedIndexMutableComponent flushingComponent = flushOp.getFlushingComponent();
+ LSMInvertedIndexMemoryComponent flushingComponent = (LSMInvertedIndexMemoryComponent) flushOp
+ .getFlushingComponent();
InMemoryInvertedIndexAccessor memInvIndexAccessor = (InMemoryInvertedIndexAccessor) flushingComponent
.getInvIndex().createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
BTreeAccessor memBTreeAccessor = memInvIndexAccessor.getBTreeAccessor();
@@ -496,44 +524,43 @@
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException {
LSMInvertedIndexOpContext ictx = createOpContext(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
- ictx.getComponentHolder().addAll(mergingComponents);
- IIndexCursor cursor = new LSMInvertedIndexRangeSearchCursor(ictx);
- RangePredicate mergePred = new RangePredicate(null, null, true, true, null, null);
-
- // Scan diskInvertedIndexes ignoring the memoryInvertedIndex.
- search(ictx, cursor, mergePred);
-
ictx.setOperation(IndexOperation.MERGE);
- LSMInvertedIndexImmutableComponent firstComponent = (LSMInvertedIndexImmutableComponent) mergingComponents
- .get(0);
+ List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
+ IIndexCursor cursor = new LSMInvertedIndexRangeSearchCursor(ictx);
+
+ LSMInvertedIndexDiskComponent firstComponent = (LSMInvertedIndexDiskComponent) mergingComponents.get(0);
OnDiskInvertedIndex firstInvIndex = (OnDiskInvertedIndex) firstComponent.getInvIndex();
String firstFileName = firstInvIndex.getBTree().getFileReference().getFile().getName();
- LSMInvertedIndexImmutableComponent lastComponent = (LSMInvertedIndexImmutableComponent) mergingComponents
+ LSMInvertedIndexDiskComponent lastComponent = (LSMInvertedIndexDiskComponent) mergingComponents
.get(mergingComponents.size() - 1);
OnDiskInvertedIndex lastInvIndex = (OnDiskInvertedIndex) lastComponent.getInvIndex();
String lastFileName = lastInvIndex.getBTree().getFileReference().getFile().getName();
LSMComponentFileReferences relMergeFileRefs = fileManager.getRelMergeFileReference(firstFileName, lastFileName);
- ILSMIndexAccessorInternal accessor = new LSMInvertedIndexAccessor(this, lsmHarness, fileManager, ictx);
+ ILSMIndexAccessorInternal accessor = new LSMInvertedIndexAccessor(lsmHarness, ctx);
ioScheduler.scheduleOperation(new LSMInvertedIndexMergeOperation(accessor, mergingComponents, cursor,
relMergeFileRefs.getInsertIndexFileReference(), relMergeFileRefs.getDeleteIndexFileReference(),
relMergeFileRefs.getBloomFilterFileReference(), callback));
}
@Override
- public ILSMComponent merge(List<ILSMComponent> mergedComponents, ILSMIOOperation operation)
- throws HyracksDataException, IndexException {
+ public ILSMComponent merge(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMInvertedIndexMergeOperation mergeOp = (LSMInvertedIndexMergeOperation) operation;
+ IIndexCursor cursor = mergeOp.getCursor();
+
+ RangePredicate mergePred = new RangePredicate(null, null, true, true, null, null);
+ ILSMIndexOperationContext opCtx = ((LSMIndexSearchCursor) cursor).getOpCtx();
+ opCtx.getComponentHolder().addAll(mergeOp.getMergingComponents());
+ // Scan diskInvertedIndexes ignoring the memoryInvertedIndex.
+ search(opCtx, cursor, mergePred);
// Create an inverted index instance.
- LSMInvertedIndexImmutableComponent component = createDiskInvIndexComponent(componentFactory,
+ LSMInvertedIndexDiskComponent component = createDiskInvIndexComponent(componentFactory,
mergeOp.getDictBTreeMergeTarget(), mergeOp.getDeletedKeysBTreeMergeTarget(),
mergeOp.getBloomFilterMergeTarget(), true);
IInvertedIndex mergedDiskInvertedIndex = component.getInvIndex();
- IIndexCursor cursor = mergeOp.getCursor();
IIndexBulkLoader invIndexBulkLoader = mergedDiskInvertedIndex.createBulkLoader(1.0f, true, 0L, false);
try {
while (cursor.hasNext()) {
@@ -545,10 +572,6 @@
cursor.close();
}
invIndexBulkLoader.end();
-
- // Add the merged components for cleanup.
- mergedComponents.addAll(mergeOp.getMergingComponents());
-
return component;
}
@@ -568,10 +591,6 @@
}
}
- public boolean isEmptyIndex() throws HyracksDataException {
- return componentsRef.get().isEmpty() && !mutableComponent.isModified();
- }
-
public class LSMInvertedIndexBulkLoader implements IIndexBulkLoader {
private final ILSMComponent component;
private final IIndexBulkLoader invIndexBulkLoader;
@@ -590,8 +609,8 @@
} catch (HyracksDataException | IndexException e) {
throw new IndexException(e);
}
- invIndexBulkLoader = ((LSMInvertedIndexImmutableComponent) component).getInvIndex().createBulkLoader(
- fillFactor, verifyInput, numElementsHint, false);
+ invIndexBulkLoader = ((LSMInvertedIndexDiskComponent) component).getInvIndex().createBulkLoader(fillFactor,
+ verifyInput, numElementsHint, false);
}
@Override
@@ -610,12 +629,12 @@
protected void cleanupArtifacts() throws HyracksDataException {
if (!cleanedUpArtifacts) {
cleanedUpArtifacts = true;
- ((LSMInvertedIndexImmutableComponent) component).getInvIndex().deactivate();
- ((LSMInvertedIndexImmutableComponent) component).getInvIndex().destroy();
- ((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().deactivate();
- ((LSMInvertedIndexImmutableComponent) component).getDeletedKeysBTree().destroy();
- ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().deactivate();
- ((LSMInvertedIndexImmutableComponent) component).getBloomFilter().destroy();
+ ((LSMInvertedIndexDiskComponent) component).getInvIndex().deactivate();
+ ((LSMInvertedIndexDiskComponent) component).getInvIndex().destroy();
+ ((LSMInvertedIndexDiskComponent) component).getDeletedKeysBTree().deactivate();
+ ((LSMInvertedIndexDiskComponent) component).getDeletedKeysBTree().destroy();
+ ((LSMInvertedIndexDiskComponent) component).getBloomFilter().deactivate();
+ ((LSMInvertedIndexDiskComponent) component).getBloomFilter().destroy();
}
}
@@ -632,17 +651,17 @@
}
}
- protected InMemoryInvertedIndex createInMemoryInvertedIndex(IVirtualBufferCache virtualBufferCache)
- throws IndexException {
+ protected InMemoryInvertedIndex createInMemoryInvertedIndex(IVirtualBufferCache virtualBufferCache,
+ IVirtualFreePageManager virtualFreePageManager, int id) throws IndexException {
return InvertedIndexUtils.createInMemoryBTreeInvertedindex(virtualBufferCache, virtualFreePageManager,
invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
- new FileReference(new File(fileManager.getBaseDir() + "_virtual_vocab")));
+ new FileReference(new File(fileManager.getBaseDir() + "_virtual_vocab_" + id)));
}
- protected LSMInvertedIndexImmutableComponent createDiskInvIndexComponent(ILSMComponentFactory factory,
+ protected LSMInvertedIndexDiskComponent createDiskInvIndexComponent(ILSMComponentFactory factory,
FileReference dictBTreeFileRef, FileReference btreeFileRef, FileReference bloomFilterFileRef, boolean create)
throws HyracksDataException, IndexException {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) factory
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) factory
.createLSMComponentInstance(new LSMComponentFileReferences(dictBTreeFileRef, btreeFileRef,
bloomFilterFileRef));
if (create) {
@@ -659,15 +678,13 @@
@Override
public ILSMIndexAccessorInternal createAccessor(IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback) {
- return new LSMInvertedIndexAccessor(this, lsmHarness, fileManager, createOpContext(modificationCallback,
- searchCallback));
+ ISearchOperationCallback searchCallback) throws HyracksDataException {
+ return new LSMInvertedIndexAccessor(lsmHarness, createOpContext(modificationCallback, searchCallback));
}
private LSMInvertedIndexOpContext createOpContext(IModificationOperationCallback modificationCallback,
- ISearchOperationCallback searchCallback) {
- return new LSMInvertedIndexOpContext(mutableComponent.getInvIndex(), mutableComponent.getDeletedKeysBTree(),
- modificationCallback, searchCallback);
+ ISearchOperationCallback searchCallback) throws HyracksDataException {
+ return new LSMInvertedIndexOpContext(memoryComponents, modificationCallback, searchCallback);
}
@Override
@@ -693,8 +710,13 @@
@Override
public long getMemoryAllocationSize() {
- IBufferCache virtualBufferCache = mutableComponent.getInvIndex().getBufferCache();
- return virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ long size = 0;
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ IBufferCache virtualBufferCache = mutableComponent.getInvIndex().getBufferCache();
+ size += virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ }
+ return size;
}
@Override
@@ -721,7 +743,7 @@
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
- LSMInvertedIndexImmutableComponent invIndexComponent = (LSMInvertedIndexImmutableComponent) lsmComponent;
+ LSMInvertedIndexDiskComponent invIndexComponent = (LSMInvertedIndexDiskComponent) lsmComponent;
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) invIndexComponent.getInvIndex();
// Flush the bloom filter first.
int fileId = invIndexComponent.getBloomFilter().getFileId();
@@ -742,11 +764,14 @@
@Override
public void validate() throws HyracksDataException {
- mutableComponent.getInvIndex().validate();
- mutableComponent.getDeletedKeysBTree().validate();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ for (ILSMComponent c : memoryComponents) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) c;
+ mutableComponent.getInvIndex().validate();
+ mutableComponent.getDeletedKeysBTree().validate();
+ }
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) c;
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) c;
component.getInvIndex().validate();
component.getDeletedKeysBTree().validate();
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
index 16f1fa1..e31af9a 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexAccessor.java
@@ -25,7 +25,6 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
@@ -33,16 +32,11 @@
public class LSMInvertedIndexAccessor implements ILSMIndexAccessorInternal, IInvertedIndexAccessor {
protected final ILSMHarness lsmHarness;
- protected final ILSMIndexFileManager fileManager;
protected final ILSMIndexOperationContext ctx;
- protected final LSMInvertedIndex invIndex;
- public LSMInvertedIndexAccessor(LSMInvertedIndex invIndex, ILSMHarness lsmHarness,
- ILSMIndexFileManager fileManager, ILSMIndexOperationContext ctx) {
+ public LSMInvertedIndexAccessor(ILSMHarness lsmHarness, ILSMIndexOperationContext ctx) {
this.lsmHarness = lsmHarness;
- this.fileManager = fileManager;
this.ctx = ctx;
- this.invIndex = invIndex;
}
@Override
@@ -112,11 +106,6 @@
}
@Override
- public void noOp() throws HyracksDataException {
- lsmHarness.noOp(ctx);
- }
-
- @Override
public void forcePhysicalDelete(ITupleReference tuple) throws HyracksDataException, IndexException {
throw new UnsupportedOperationException("Physical delete not supported by lsm inverted index.");
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponent.java
similarity index 85%
rename from hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponent.java
index 829d5d4..323edd1 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexImmutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponent.java
@@ -17,16 +17,16 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractDiskLSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
-public class LSMInvertedIndexImmutableComponent extends AbstractImmutableLSMComponent {
+public class LSMInvertedIndexDiskComponent extends AbstractDiskLSMComponent {
private final IInvertedIndex invIndex;
private final BTree deletedKeysBTree;
private final BloomFilter bloomFilter;
- public LSMInvertedIndexImmutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, BloomFilter bloomFilter) {
+ public LSMInvertedIndexDiskComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, BloomFilter bloomFilter) {
this.invIndex = invIndex;
this.deletedKeysBTree = deletedKeysBTree;
this.bloomFilter = bloomFilter;
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponentFactory.java
similarity index 89%
rename from hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
rename to hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponentFactory.java
index 1455647..f83f529 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexComponentFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexDiskComponentFactory.java
@@ -26,12 +26,12 @@
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.OnDiskInvertedIndexFactory;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-public class LSMInvertedIndexComponentFactory implements ILSMComponentFactory {
+public class LSMInvertedIndexDiskComponentFactory implements ILSMComponentFactory {
private final OnDiskInvertedIndexFactory diskInvIndexFactory;
private final TreeIndexFactory<BTree> btreeFactory;
private final BloomFilterFactory bloomFilterFactory;
- public LSMInvertedIndexComponentFactory(OnDiskInvertedIndexFactory diskInvIndexFactory,
+ public LSMInvertedIndexDiskComponentFactory(OnDiskInvertedIndexFactory diskInvIndexFactory,
TreeIndexFactory<BTree> btreeFactory, BloomFilterFactory bloomFilterFactory) {
this.diskInvIndexFactory = diskInvIndexFactory;
this.btreeFactory = btreeFactory;
@@ -41,7 +41,7 @@
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
HyracksDataException {
- return new LSMInvertedIndexImmutableComponent(diskInvIndexFactory.createIndexInstance(cfr
+ return new LSMInvertedIndexDiskComponent(diskInvIndexFactory.createIndexInstance(cfr
.getInsertIndexFileReference()), btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
index a8973d1..45433e7 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexFlushOperation.java
@@ -23,22 +23,22 @@
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessorInternal;
public class LSMInvertedIndexFlushOperation implements ILSMIOOperation {
private final ILSMIndexAccessorInternal accessor;
- private final LSMInvertedIndexMutableComponent flushingComponent;
+ private final ILSMComponent flushingComponent;
private final FileReference dictBTreeFlushTarget;
private final FileReference deletedKeysBTreeFlushTarget;
private final FileReference bloomFilterFlushTarget;
private final ILSMIOOperationCallback callback;
- public LSMInvertedIndexFlushOperation(ILSMIndexAccessorInternal accessor,
- LSMInvertedIndexMutableComponent flushingComponent, FileReference dictBTreeFlushTarget,
- FileReference deletedKeysBTreeFlushTarget, FileReference bloomFilterFlushTarget,
- ILSMIOOperationCallback callback) {
+ public LSMInvertedIndexFlushOperation(ILSMIndexAccessorInternal accessor, ILSMComponent flushingComponent,
+ FileReference dictBTreeFlushTarget, FileReference deletedKeysBTreeFlushTarget,
+ FileReference bloomFilterFlushTarget, ILSMIOOperationCallback callback) {
this.accessor = accessor;
this.flushingComponent = flushingComponent;
this.dictBTreeFlushTarget = dictBTreeFlushTarget;
@@ -63,8 +63,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.flush(this);
+ return true;
}
@Override
@@ -84,7 +85,7 @@
return bloomFilterFlushTarget;
}
- public LSMInvertedIndexMutableComponent getFlushingComponent() {
+ public ILSMComponent getFlushingComponent() {
return flushingComponent;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMutableComponent.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMemoryComponent.java
similarity index 82%
rename from hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMemoryComponent.java
index 7084803..eb1f915 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMemoryComponent.java
@@ -18,19 +18,19 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMemoryLSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
-public class LSMInvertedIndexMutableComponent extends AbstractMutableLSMComponent {
+public class LSMInvertedIndexMemoryComponent extends AbstractMemoryLSMComponent {
private final IInvertedIndex invIndex;
private final BTree deletedKeysBTree;
- private final IVirtualBufferCache vbc;
- public LSMInvertedIndexMutableComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, IVirtualBufferCache vbc) {
+ public LSMInvertedIndexMemoryComponent(IInvertedIndex invIndex, BTree deletedKeysBTree, IVirtualBufferCache vbc,
+ boolean isActive) {
+ super(vbc, isActive);
this.invIndex = invIndex;
this.deletedKeysBTree = deletedKeysBTree;
- this.vbc = vbc;
}
public IInvertedIndex getInvIndex() {
@@ -42,11 +42,6 @@
}
@Override
- protected boolean isFull() {
- return vbc.isFull();
- }
-
- @Override
protected void reset() throws HyracksDataException {
super.reset();
invIndex.deactivate();
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
index 7714808..7cd921a 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexMergeOperation.java
@@ -55,7 +55,7 @@
public Set<IODeviceHandle> getReadDevices() {
Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
for (Object o : mergingComponents) {
- LSMInvertedIndexImmutableComponent component = (LSMInvertedIndexImmutableComponent) o;
+ LSMInvertedIndexDiskComponent component = (LSMInvertedIndexDiskComponent) o;
OnDiskInvertedIndex invIndex = (OnDiskInvertedIndex) component.getInvIndex();
devs.add(invIndex.getBTree().getFileReference().getDeviceHandle());
devs.add(component.getDeletedKeysBTree().getFileReference().getDeviceHandle());
@@ -74,8 +74,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.merge(this);
+ return true;
}
@Override
@@ -102,5 +103,4 @@
public List<ILSMComponent> getMergingComponents() {
return mergingComponents;
}
-
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
index fd83bfb..1a9303f 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexOpContext.java
@@ -19,7 +19,6 @@
import java.util.List;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
@@ -28,7 +27,6 @@
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
public class LSMInvertedIndexOpContext implements ILSMIndexOperationContext {
@@ -36,8 +34,6 @@
private static final int NUM_DOCUMENT_FIELDS = 1;
private IndexOperation op;
- private final IInvertedIndex memInvIndex;
- private final IIndex memDeletedKeysBTree;
private final List<ILSMComponent> componentHolder;
public final IModificationOperationCallback modificationCallback;
@@ -46,18 +42,43 @@
// Tuple that only has the inverted-index elements (aka keys), projecting away the document fields.
public PermutingTupleReference keysOnlyTuple;
- // Accessor to the in-memory inverted index.
- public IInvertedIndexAccessor memInvIndexAccessor;
- // Accessor to the deleted-keys BTree.
- public IIndexAccessor deletedKeysBTreeAccessor;
+ // Accessor to the in-memory inverted indexes.
+ public IInvertedIndexAccessor[] mutableInvIndexAccessors;
+ // Accessor to the deleted-keys BTrees.
+ public IIndexAccessor[] deletedKeysBTreeAccessors;
- public LSMInvertedIndexOpContext(IInvertedIndex memInvIndex, IIndex memDeletedKeysBTree,
- IModificationOperationCallback modificationCallback, ISearchOperationCallback searchCallback) {
- this.memInvIndex = memInvIndex;
- this.memDeletedKeysBTree = memDeletedKeysBTree;
+ public IInvertedIndexAccessor currentMutableInvIndexAccessors;
+ public IIndexAccessor currentDeletedKeysBTreeAccessors;
+
+ public LSMInvertedIndexOpContext(List<ILSMComponent> mutableComponents,
+ IModificationOperationCallback modificationCallback, ISearchOperationCallback searchCallback)
+ throws HyracksDataException {
this.componentHolder = new LinkedList<ILSMComponent>();
this.modificationCallback = modificationCallback;
this.searchCallback = searchCallback;
+
+ mutableInvIndexAccessors = new IInvertedIndexAccessor[mutableComponents.size()];
+ deletedKeysBTreeAccessors = new IIndexAccessor[mutableComponents.size()];
+
+ for (int i = 0; i < mutableComponents.size(); i++) {
+ LSMInvertedIndexMemoryComponent mutableComponent = (LSMInvertedIndexMemoryComponent) mutableComponents
+ .get(i);
+ mutableInvIndexAccessors[i] = (IInvertedIndexAccessor) mutableComponent.getInvIndex().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ deletedKeysBTreeAccessors[i] = mutableComponent.getDeletedKeysBTree().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ }
+
+ assert mutableComponents.size() > 0;
+
+ // Project away the document fields, leaving only the key fields.
+ LSMInvertedIndexMemoryComponent c = (LSMInvertedIndexMemoryComponent) mutableComponents.get(0);
+ int numKeyFields = c.getInvIndex().getInvListTypeTraits().length;
+ int[] keyFieldPermutation = new int[numKeyFields];
+ for (int i = 0; i < numKeyFields; i++) {
+ keyFieldPermutation[i] = NUM_DOCUMENT_FIELDS + i;
+ }
+ keysOnlyTuple = new PermutingTupleReference(keyFieldPermutation);
}
@Override
@@ -69,26 +90,6 @@
// TODO: Ignore opcallback for now.
public void setOperation(IndexOperation newOp) throws HyracksDataException {
reset();
- switch (newOp) {
- case INSERT:
- case DELETE:
- case PHYSICALDELETE: {
- if (deletedKeysBTreeAccessor == null) {
- memInvIndexAccessor = (IInvertedIndexAccessor) memInvIndex.createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- deletedKeysBTreeAccessor = memDeletedKeysBTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- // Project away the document fields, leaving only the key fields.
- int numKeyFields = memInvIndex.getInvListTypeTraits().length;
- int[] keyFieldPermutation = new int[numKeyFields];
- for (int i = 0; i < numKeyFields; i++) {
- keyFieldPermutation[i] = NUM_DOCUMENT_FIELDS + i;
- }
- keysOnlyTuple = new PermutingTupleReference(keyFieldPermutation);
- }
- break;
- }
- }
op = newOp;
}
@@ -111,4 +112,10 @@
public IModificationOperationCallback getModificationCallback() {
return modificationCallback;
}
+
+ @Override
+ public void setCurrentMutableComponentId(int currentMutableComponentId) {
+ currentMutableInvIndexAccessors = mutableInvIndexAccessors[currentMutableComponentId];
+ currentDeletedKeysBTreeAccessors = deletedKeysBTreeAccessors[currentMutableComponentId];
+ }
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
index 820b516..cd0dde3 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexRangeSearchCursor.java
@@ -27,7 +27,9 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
@@ -63,7 +65,7 @@
}
lsmHarness = lsmInitState.getLSMHarness();
operationalComponents = lsmInitState.getOperationalComponents();
- includeMemComponent = lsmInitState.getIncludeMemComponent();
+ includeMutableComponent = lsmInitState.getIncludeMemComponent();
// For searching the deleted-keys BTrees.
this.keysOnlyTuple = lsmInitState.getKeysOnlyTuple();
@@ -71,18 +73,17 @@
if (!deletedKeysBTreeAccessors.isEmpty()) {
deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
- int i = 0;
- if (includeMemComponent) {
- // No need for a bloom filter for the in-memory BTree.
- deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
- ++i;
+ for (int i = 0; i < operationalComponents.size(); i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ if (component.getType() == LSMComponentType.MEMORY) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ } else {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexDiskComponent) operationalComponents.get(i)).getBloomFilter());
+ }
}
- for (; i < deletedKeysBTreeCursors.length; i++) {
- deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
- .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
- ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
- }
-
}
MultiComparator keyCmp = lsmInitState.getKeyComparator();
keySearchPred = new RangePredicate(keysOnlyTuple, keysOnlyTuple, true, true, keyCmp, keyCmp);
@@ -113,7 +114,7 @@
}
return false;
}
-
+
@Override
protected void checkPriorityQueue() throws HyracksDataException, IndexException {
while (!outputPriorityQueue.isEmpty() || needPush == true) {
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
index 882d1a1..19acbfb 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/LSMInvertedIndexSearchCursor.java
@@ -28,6 +28,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
@@ -44,7 +45,6 @@
private int accessorIndex = -1;
private boolean tupleConsumed = true;
private ILSMHarness harness;
- private boolean includeMemComponent;
private List<IIndexAccessor> indexAccessors;
private ISearchPredicate searchPred;
private ISearchOperationCallback searchCallback;
@@ -61,7 +61,6 @@
public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
LSMInvertedIndexSearchCursorInitialState lsmInitState = (LSMInvertedIndexSearchCursorInitialState) initialState;
harness = lsmInitState.getLSMHarness();
- includeMemComponent = lsmInitState.getIncludeMemComponent();
operationalComponents = lsmInitState.getOperationalComponents();
indexAccessors = lsmInitState.getIndexAccessors();
opCtx = lsmInitState.getOpContext();
@@ -72,16 +71,17 @@
// For searching the deleted-keys BTrees.
deletedKeysBTreeAccessors = lsmInitState.getDeletedKeysBTreeAccessors();
deletedKeysBTreeCursors = new IIndexCursor[deletedKeysBTreeAccessors.size()];
- int i = 0;
- if (includeMemComponent) {
- // No need for a bloom filter for the in-memory BTree.
- deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
- ++i;
- }
- for (; i < deletedKeysBTreeCursors.length; i++) {
- deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
- .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
- ((LSMInvertedIndexImmutableComponent) operationalComponents.get(i)).getBloomFilter());
+
+ for (int i = 0; i < operationalComponents.size(); i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ if (component.getType() == LSMComponentType.MEMORY) {
+ // No need for a bloom filter for the in-memory BTree.
+ deletedKeysBTreeCursors[i] = deletedKeysBTreeAccessors.get(i).createSearchCursor();
+ } else {
+ deletedKeysBTreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitState
+ .getgetDeletedKeysBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMInvertedIndexDiskComponent) operationalComponents.get(i)).getBloomFilter());
+ }
}
MultiComparator keyCmp = lsmInitState.getKeyComparator();
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
index 185321b..ddb6060 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/impls/PartitionedLSMInvertedIndex.java
@@ -16,13 +16,15 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls;
import java.io.File;
+import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilterFactory;
+import edu.uci.ics.hyracks.storage.am.common.api.IVirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
@@ -37,25 +39,25 @@
public class PartitionedLSMInvertedIndex extends LSMInvertedIndex {
- public PartitionedLSMInvertedIndex(IVirtualBufferCache virtualBufferCache,
+ public PartitionedLSMInvertedIndex(List<IVirtualBufferCache> virtualBufferCaches,
OnDiskInvertedIndexFactory diskInvIndexFactory, BTreeFactory deletedKeysBTreeFactory,
BloomFilterFactory bloomFilterFactory, double bloomFilterFalsePositiveRate,
ILSMIndexFileManager fileManager, IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws IndexException {
- super(virtualBufferCache, diskInvIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory,
+ ILSMIOOperationCallback ioOpCallback) throws IndexException {
+ super(virtualBufferCaches, diskInvIndexFactory, deletedKeysBTreeFactory, bloomFilterFactory,
bloomFilterFalsePositiveRate, fileManager, diskFileMapProvider, invListTypeTraits, invListCmpFactories,
- tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy, opTracker, ioScheduler,
- ioOpCallbackProvider);
+ tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy, opTracker, ioScheduler, ioOpCallback);
}
- protected InMemoryInvertedIndex createInMemoryInvertedIndex(IVirtualBufferCache virtualBufferCache)
- throws IndexException {
+ @Override
+ protected InMemoryInvertedIndex createInMemoryInvertedIndex(IVirtualBufferCache virtualBufferCache,
+ IVirtualFreePageManager virtualFreePageManager, int id) throws IndexException {
return InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(virtualBufferCache,
virtualFreePageManager, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
- tokenizerFactory, new FileReference(new File(fileManager.getBaseDir() + "_virtual_vocab")));
+ tokenizerFactory, new FileReference(new File(fileManager.getBaseDir() + "_virtual_vocab_" + id)));
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
index 222f4de..c70c2d5 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndex.java
@@ -19,9 +19,7 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
@@ -33,6 +31,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
@@ -117,7 +117,7 @@
ITupleReference insertTuple = ctx.tupleIter.getTuple();
try {
btreeAccessor.insert(insertTuple);
- } catch (BTreeDuplicateKeyException e) {
+ } catch (TreeIndexDuplicateKeyException e) {
// This exception may be caused by duplicate tokens in the same insert "document".
// We ignore such duplicate tokens in all inverted-index implementations, hence
// we can safely ignore this exception.
@@ -134,7 +134,7 @@
ITupleReference deleteTuple = ctx.tupleIter.getTuple();
try {
btreeAccessor.delete(deleteTuple);
- } catch (BTreeNonExistentKeyException e) {
+ } catch (TreeIndexNonExistentKeyException e) {
// Ignore this exception, since a document may have duplicate tokens.
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
index 9d85f56..372221e 100644
--- a/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
+++ b/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/InvertedIndexUtils.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
import java.io.File;
+import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
@@ -32,7 +33,7 @@
import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -115,13 +116,13 @@
return deletedKeysBTreeFactory;
}
- public static LSMInvertedIndex createLSMInvertedIndex(IVirtualBufferCache virtualBufferCache,
+ public static LSMInvertedIndex createLSMInvertedIndex(List<IVirtualBufferCache> virtualBufferCaches,
IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
IBufferCache diskBufferCache, String onDiskDir, double bloomFilterFalsePositiveRate,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws IndexException {
+ ILSMIOOperationCallback ioOpCallback) throws IndexException {
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
@@ -143,20 +144,20 @@
diskFileMapProvider, invListBuilderFactory, invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, fileManager);
- LSMInvertedIndex invIndex = new LSMInvertedIndex(virtualBufferCache, invIndexFactory, deletedKeysBTreeFactory,
+ LSMInvertedIndex invIndex = new LSMInvertedIndex(virtualBufferCaches, invIndexFactory, deletedKeysBTreeFactory,
bloomFilterFactory, bloomFilterFalsePositiveRate, fileManager, diskFileMapProvider, invListTypeTraits,
invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, mergePolicy, opTracker,
- ioScheduler, ioOpCallbackProvider);
+ ioScheduler, ioOpCallback);
return invIndex;
}
- public static PartitionedLSMInvertedIndex createPartitionedLSMInvertedIndex(IVirtualBufferCache virtualBufferCache,
- IFileMapProvider diskFileMapProvider, ITypeTraits[] invListTypeTraits,
- IBinaryComparatorFactory[] invListCmpFactories, ITypeTraits[] tokenTypeTraits,
- IBinaryComparatorFactory[] tokenCmpFactories, IBinaryTokenizerFactory tokenizerFactory,
- IBufferCache diskBufferCache, String onDiskDir, double bloomFilterFalsePositiveRate,
- ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws IndexException {
+ public static PartitionedLSMInvertedIndex createPartitionedLSMInvertedIndex(
+ List<IVirtualBufferCache> virtualBufferCaches, IFileMapProvider diskFileMapProvider,
+ ITypeTraits[] invListTypeTraits, IBinaryComparatorFactory[] invListCmpFactories,
+ ITypeTraits[] tokenTypeTraits, IBinaryComparatorFactory[] tokenCmpFactories,
+ IBinaryTokenizerFactory tokenizerFactory, IBufferCache diskBufferCache, String onDiskDir,
+ double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback) throws IndexException {
BTreeFactory deletedKeysBTreeFactory = createDeletedKeysBTreeFactory(diskFileMapProvider, invListTypeTraits,
invListCmpFactories, diskBufferCache);
@@ -178,10 +179,10 @@
diskBufferCache, diskFileMapProvider, invListBuilderFactory, invListTypeTraits, invListCmpFactories,
tokenTypeTraits, tokenCmpFactories, fileManager);
- PartitionedLSMInvertedIndex invIndex = new PartitionedLSMInvertedIndex(virtualBufferCache, invIndexFactory,
+ PartitionedLSMInvertedIndex invIndex = new PartitionedLSMInvertedIndex(virtualBufferCaches, invIndexFactory,
deletedKeysBTreeFactory, bloomFilterFactory, bloomFilterFalsePositiveRate, fileManager,
diskFileMapProvider, invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories,
- tokenizerFactory, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
+ tokenizerFactory, mergePolicy, opTracker, ioScheduler, ioOpCallback);
return invIndex;
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/pom.xml b/hyracks/hyracks-storage-am-lsm-rtree/pom.xml
index 080ba68..09a7fbf 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/pom.xml
+++ b/hyracks/hyracks-storage-am-lsm-rtree/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,21 +39,21 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/AbstractLSMRTreeDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/AbstractLSMRTreeDataflowHelper.java
index 9fc8cea..636f0cb 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/AbstractLSMRTreeDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/AbstractLSMRTreeDataflowHelper.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
@@ -25,7 +27,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -44,25 +46,25 @@
protected final ILinearizeComparatorFactory linearizeCmpFactory;
public AbstractLSMRTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, IBinaryComparatorFactory[] btreeComparatorFactories,
+ List<IVirtualBufferCache> virtualBufferCaches, IBinaryComparatorFactory[] btreeComparatorFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
ILinearizeComparatorFactory linearizeCmpFactory) {
- this(opDesc, ctx, partition, virtualBufferCache, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE,
+ this(opDesc, ctx, partition, virtualBufferCaches, DEFAULT_BLOOM_FILTER_FALSE_POSITIVE_RATE,
btreeComparatorFactories, valueProviderFactories, rtreePolicyType, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider, linearizeCmpFactory);
+ ioScheduler, ioOpCallbackFactory, linearizeCmpFactory);
}
public AbstractLSMRTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate,
+ List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
IBinaryComparatorFactory[] btreeComparatorFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
ILinearizeComparatorFactory linearizeCmpFactory) {
- super(opDesc, ctx, partition, virtualBufferCache, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
- ioScheduler, ioOpCallbackProvider);
+ super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, mergePolicy, opTrackerFactory,
+ ioScheduler, ioOpCallbackFactory);
this.btreeComparatorFactories = btreeComparatorFactories;
this.valueProviderFactories = valueProviderFactories;
this.rtreePolicyType = rtreePolicyType;
@@ -72,14 +74,14 @@
@Override
public ITreeIndex createIndexInstance() throws HyracksDataException {
AbstractTreeIndexOperatorDescriptor treeOpDesc = (AbstractTreeIndexOperatorDescriptor) opDesc;
- return createLSMTree(virtualBufferCache, file, opDesc.getStorageManager().getBufferCache(ctx), opDesc
+ return createLSMTree(virtualBufferCaches, file, opDesc.getStorageManager().getBufferCache(ctx), opDesc
.getStorageManager().getFileMapProvider(ctx), treeOpDesc.getTreeIndexTypeTraits(),
treeOpDesc.getTreeIndexComparatorFactories(), btreeComparatorFactories,
opTrackerFactory.getOperationTracker(ctx), valueProviderFactories, rtreePolicyType, linearizeCmpFactory);
}
- protected abstract ITreeIndex createLSMTree(IVirtualBufferCache virtualBufferCache, FileReference file,
+ protected abstract ITreeIndex createLSMTree(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
ILSMOperationTracker opTracker, IPrimitiveValueProviderFactory[] valueProviderFactories,
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelper.java
index b47bc02..92364fa 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelper.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
@@ -25,7 +27,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -39,39 +41,39 @@
public class LSMRTreeDataflowHelper extends AbstractLSMRTreeDataflowHelper {
public LSMRTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, IBinaryComparatorFactory[] btreeComparatorFactories,
+ List<IVirtualBufferCache> virtualBufferCaches, IBinaryComparatorFactory[] btreeComparatorFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
ILinearizeComparatorFactory linearizeCmpFactory) {
- super(opDesc, ctx, partition, virtualBufferCache, btreeComparatorFactories, valueProviderFactories,
- rtreePolicyType, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider, linearizeCmpFactory);
+ super(opDesc, ctx, partition, virtualBufferCaches, btreeComparatorFactories, valueProviderFactories,
+ rtreePolicyType, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackFactory, linearizeCmpFactory);
}
public LSMRTreeDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx, int partition,
- IVirtualBufferCache virtualBufferCache, double bloomFilterFalsePositiveRate,
+ List<IVirtualBufferCache> virtualBufferCaches, double bloomFilterFalsePositiveRate,
IBinaryComparatorFactory[] btreeComparatorFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
ILinearizeComparatorFactory linearizeCmpFactory) {
- super(opDesc, ctx, partition, virtualBufferCache, bloomFilterFalsePositiveRate, btreeComparatorFactories,
+ super(opDesc, ctx, partition, virtualBufferCaches, bloomFilterFalsePositiveRate, btreeComparatorFactories,
valueProviderFactories, rtreePolicyType, mergePolicy, opTrackerFactory, ioScheduler,
- ioOpCallbackProvider, linearizeCmpFactory);
+ ioOpCallbackFactory, linearizeCmpFactory);
}
@Override
- protected ITreeIndex createLSMTree(IVirtualBufferCache virtualBufferCache, FileReference file,
+ protected ITreeIndex createLSMTree(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
ILSMOperationTracker opTracker, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType, ILinearizeComparatorFactory linearizeCmpFactory)
throws HyracksDataException {
try {
- return LSMRTreeUtils.createLSMTree(virtualBufferCache, file, diskBufferCache, diskFileMapProvider,
+ return LSMRTreeUtils.createLSMTree(virtualBufferCaches, file, diskBufferCache, diskFileMapProvider,
typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider,
- linearizeCmpFactory);
+ bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler,
+ ioOpCallbackFactory.createIOOperationCallback(), linearizeCmpFactory);
} catch (TreeIndexException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelperFactory.java
index 5d35bc5..8254689 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeDataflowHelperFactory.java
@@ -21,7 +21,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -42,10 +42,10 @@
RTreePolicyType rtreePolicyType, IBinaryComparatorFactory[] btreeComparatorFactories,
IVirtualBufferCacheProvider virtualBufferCacheProvider, ILSMMergePolicyProvider mergePolicyProvider,
ILSMOperationTrackerProvider opTrackerFactory, ILSMIOOperationSchedulerProvider ioSchedulerProvider,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider, ILinearizeComparatorFactory linearizeCmpFactory,
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory, ILinearizeComparatorFactory linearizeCmpFactory,
double bloomFilterFalsePositiveRate) {
super(virtualBufferCacheProvider, mergePolicyProvider, opTrackerFactory, ioSchedulerProvider,
- ioOpCallbackProvider, bloomFilterFalsePositiveRate);
+ ioOpCallbackFactory, bloomFilterFalsePositiveRate);
this.btreeComparatorFactories = btreeComparatorFactories;
this.valueProviderFactories = valueProviderFactories;
this.rtreePolicyType = rtreePolicyType;
@@ -56,9 +56,9 @@
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
return new LSMRTreeDataflowHelper(opDesc, ctx, partition,
- virtualBufferCacheProvider.getVirtualBufferCache(ctx), bloomFilterFalsePositiveRate,
+ virtualBufferCacheProvider.getVirtualBufferCaches(ctx), bloomFilterFalsePositiveRate,
btreeComparatorFactories, valueProviderFactories, rtreePolicyType,
mergePolicyProvider.getMergePolicy(ctx), opTrackerFactory, ioSchedulerProvider.getIOScheduler(ctx),
- ioOpCallbackProvider, linearizeCmpFactory);
+ ioOpCallbackFactory, linearizeCmpFactory);
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelper.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelper.java
index ea1b1e2..f430456 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelper.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelper.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.dataflow;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
@@ -25,7 +27,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -38,26 +40,28 @@
public class LSMRTreeWithAntiMatterTuplesDataflowHelper extends AbstractLSMRTreeDataflowHelper {
public LSMRTreeWithAntiMatterTuplesDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
- int partition, IVirtualBufferCache virtualBufferCache, IBinaryComparatorFactory[] btreeComparatorFactories,
+ int partition, List<IVirtualBufferCache> virtualBufferCaches,
+ IBinaryComparatorFactory[] btreeComparatorFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTrackerProvider opTrackerFactory,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackFactory ioOpCallbackFactory,
ILinearizeComparatorFactory linearizeCmpFactory) {
- super(opDesc, ctx, partition, virtualBufferCache, btreeComparatorFactories, valueProviderFactories,
- rtreePolicyType, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackProvider, linearizeCmpFactory);
+ super(opDesc, ctx, partition, virtualBufferCaches, btreeComparatorFactories, valueProviderFactories,
+ rtreePolicyType, mergePolicy, opTrackerFactory, ioScheduler, ioOpCallbackFactory, linearizeCmpFactory);
}
@Override
- protected ITreeIndex createLSMTree(IVirtualBufferCache virtualBufferCache, FileReference file,
+ protected ITreeIndex createLSMTree(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
ILSMOperationTracker opTracker, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType, ILinearizeComparatorFactory linearizeCmpFactory)
throws HyracksDataException {
try {
- return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(virtualBufferCache, file, diskBufferCache,
+ return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(virtualBufferCaches, file, diskBufferCache,
diskFileMapProvider, typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories,
- rtreePolicyType, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider, linearizeCmpFactory);
+ rtreePolicyType, mergePolicy, opTracker, ioScheduler,
+ ioOpCallbackFactory.createIOOperationCallback(), linearizeCmpFactory);
} catch (TreeIndexException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelperFactory.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelperFactory.java
index 5fca08f..ef34876 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelperFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/dataflow/LSMRTreeWithAntiMatterTuplesDataflowHelperFactory.java
@@ -22,7 +22,7 @@
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationSchedulerProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicyProvider;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerProvider;
@@ -40,14 +40,14 @@
private final ILSMMergePolicyProvider mergePolicyProvider;
private final ILSMOperationTrackerProvider opTrackerProvider;
private final ILSMIOOperationSchedulerProvider ioSchedulerProvider;
- private final ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ private final ILSMIOOperationCallbackFactory ioOpCallbackFactory;
private final ILinearizeComparatorFactory linearizeCmpFactory;
public LSMRTreeWithAntiMatterTuplesDataflowHelperFactory(IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType, IBinaryComparatorFactory[] btreeComparatorFactories,
IVirtualBufferCacheProvider virtualBufferCacheProvider, ILSMMergePolicyProvider mergePolicyProvider,
ILSMOperationTrackerProvider opTrackerProvider, ILSMIOOperationSchedulerProvider ioSchedulerProvider,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider, ILinearizeComparatorFactory linearizeCmpFactory) {
+ ILSMIOOperationCallbackFactory ioOpCallbackFactory, ILinearizeComparatorFactory linearizeCmpFactory) {
this.virtualBufferCacheProvider = virtualBufferCacheProvider;
this.btreeComparatorFactories = btreeComparatorFactories;
this.valueProviderFactories = valueProviderFactories;
@@ -55,7 +55,7 @@
this.mergePolicyProvider = mergePolicyProvider;
this.ioSchedulerProvider = ioSchedulerProvider;
this.opTrackerProvider = opTrackerProvider;
- this.ioOpCallbackProvider = ioOpCallbackProvider;
+ this.ioOpCallbackFactory = ioOpCallbackFactory;
this.linearizeCmpFactory = linearizeCmpFactory;
}
@@ -63,8 +63,8 @@
public IndexDataflowHelper createIndexDataflowHelper(IIndexOperatorDescriptor opDesc, IHyracksTaskContext ctx,
int partition) {
return new LSMRTreeWithAntiMatterTuplesDataflowHelper(opDesc, ctx, partition,
- virtualBufferCacheProvider.getVirtualBufferCache(ctx), btreeComparatorFactories,
+ virtualBufferCacheProvider.getVirtualBufferCaches(ctx), btreeComparatorFactories,
valueProviderFactories, rtreePolicyType, mergePolicyProvider.getMergePolicy(ctx), opTrackerProvider,
- ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackProvider, linearizeCmpFactory);
+ ioSchedulerProvider.getIOScheduler(ctx), ioOpCallbackFactory, linearizeCmpFactory);
}
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
index e3e690b..b09e115 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/AbstractLSMRTree.java
@@ -23,24 +23,24 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTree.BTreeAccessor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.IVirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponentFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
@@ -52,11 +52,9 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractLSMIndex;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BlockingIOOperationCallbackWrapper;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
-import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree.RTreeAccessor;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
@@ -66,11 +64,6 @@
protected final int[] comparatorFields;
protected final IBinaryComparatorFactory[] linearizerArray;
- // In-memory components.
- protected final LSMRTreeMutableComponent mutableComponent;
- protected final IVirtualBufferCache virtualBufferCache;
- protected final IVirtualFreePageManager virtualFreePageManager;
-
protected TreeTupleSorter rTreeTupleSorter;
// On-disk components.
@@ -86,27 +79,35 @@
protected final ITreeIndexFrameFactory rtreeLeafFrameFactory;
protected final ITreeIndexFrameFactory btreeLeafFrameFactory;
- public AbstractLSMRTree(IVirtualBufferCache virtualBufferCache, ITreeIndexFrameFactory rtreeInteriorFrameFactory,
- ITreeIndexFrameFactory rtreeLeafFrameFactory, ITreeIndexFrameFactory btreeInteriorFrameFactory,
- ITreeIndexFrameFactory btreeLeafFrameFactory, ILSMIndexFileManager fileManager,
- TreeIndexFactory<RTree> diskRTreeFactory, ILSMComponentFactory componentFactory,
+ public AbstractLSMRTree(List<IVirtualBufferCache> virtualBufferCaches,
+ ITreeIndexFrameFactory rtreeInteriorFrameFactory, ITreeIndexFrameFactory rtreeLeafFrameFactory,
+ ITreeIndexFrameFactory btreeInteriorFrameFactory, ITreeIndexFrameFactory btreeLeafFrameFactory,
+ ILSMIndexFileManager fileManager, ILSMComponentFactory componentFactory,
IFileMapProvider diskFileMapProvider, int fieldCount, IBinaryComparatorFactory[] rtreeCmpFactories,
IBinaryComparatorFactory[] btreeCmpFactories, ILinearizeComparatorFactory linearizer,
int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray, double bloomFilterFalsePositiveRate,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(virtualBufferCache, diskRTreeFactory.getBufferCache(), fileManager, diskFileMapProvider,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
- virtualFreePageManager = new VirtualFreePageManager(virtualBufferCache.getNumPages());
- RTree memRTree = new RTree(virtualBufferCache, ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(),
- virtualFreePageManager, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, rtreeCmpFactories,
- fieldCount, new FileReference(new File(fileManager.getBaseDir() + "_virtual_r")));
- BTree memBTree = new BTree(virtualBufferCache, ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(),
- new VirtualFreePageManager(virtualBufferCache.getNumPages()), btreeInteriorFrameFactory,
- btreeLeafFrameFactory, btreeCmpFactories, fieldCount, new FileReference(new File(
- fileManager.getBaseDir() + "_virtual_b")));
- mutableComponent = new LSMRTreeMutableComponent(memRTree, memBTree, virtualBufferCache);
- this.virtualBufferCache = virtualBufferCache;
+ ILSMIOOperationCallback ioOpCallback) {
+ super(virtualBufferCaches, componentFactory.getBufferCache(), fileManager, diskFileMapProvider,
+ bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback);
+ int i = 0;
+ for (IVirtualBufferCache virtualBufferCache : virtualBufferCaches) {
+ RTree memRTree = new RTree(virtualBufferCache,
+ ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(), new VirtualFreePageManager(
+ virtualBufferCache.getNumPages()), rtreeInteriorFrameFactory, rtreeLeafFrameFactory,
+ rtreeCmpFactories, fieldCount, new FileReference(new File(fileManager.getBaseDir() + "_virtual_r_"
+ + i)));
+ BTree memBTree = new BTree(virtualBufferCache,
+ ((IVirtualBufferCache) virtualBufferCache).getFileMapProvider(), new VirtualFreePageManager(
+ virtualBufferCache.getNumPages()), btreeInteriorFrameFactory, btreeLeafFrameFactory,
+ btreeCmpFactories, fieldCount, new FileReference(new File(fileManager.getBaseDir() + "_virtual_b_"
+ + i)));
+ LSMRTreeMemoryComponent mutableComponent = new LSMRTreeMemoryComponent(memRTree, memBTree,
+ virtualBufferCache, i == 0 ? true : false);
+ memoryComponents.add(mutableComponent);
+ ++i;
+ }
+
this.rtreeInteriorFrameFactory = rtreeInteriorFrameFactory;
this.rtreeLeafFrameFactory = rtreeLeafFrameFactory;
this.btreeInteriorFrameFactory = btreeInteriorFrameFactory;
@@ -128,7 +129,7 @@
fileManager.deleteDirs();
fileManager.createDirs();
- componentsRef.get().clear();
+ diskComponents.clear();
}
@Override
@@ -137,11 +138,14 @@
throw new HyracksDataException("Failed to activate the index since it is already activated.");
}
- ((IVirtualBufferCache) mutableComponent.getRTree().getBufferCache()).open();
- mutableComponent.getRTree().create();
- mutableComponent.getBTree().create();
- mutableComponent.getRTree().activate();
- mutableComponent.getBTree().activate();
+ for (ILSMComponent c : memoryComponents) {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) c;
+ ((IVirtualBufferCache) mutableComponent.getRTree().getBufferCache()).open();
+ mutableComponent.getRTree().create();
+ mutableComponent.getBTree().create();
+ mutableComponent.getRTree().activate();
+ mutableComponent.getBTree().activate();
+ }
}
@Override
@@ -152,9 +156,8 @@
if (flushOnExit) {
BlockingIOOperationCallbackWrapper cb = new BlockingIOOperationCallbackWrapper(
- ioOpCallbackProvider.getIOOperationCallback(this));
- ILSMIndexAccessor accessor = (ILSMIndexAccessor) createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
+ ioOpCallback);
+ ILSMIndexAccessor accessor = createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
accessor.scheduleFlush(cb);
try {
cb.waitForIO();
@@ -163,11 +166,14 @@
}
}
- mutableComponent.getRTree().deactivate();
- mutableComponent.getBTree().deactivate();
- mutableComponent.getRTree().destroy();
- mutableComponent.getBTree().destroy();
- ((IVirtualBufferCache) mutableComponent.getRTree().getBufferCache()).close();
+ for (ILSMComponent c : memoryComponents) {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) c;
+ mutableComponent.getRTree().deactivate();
+ mutableComponent.getBTree().deactivate();
+ mutableComponent.getRTree().destroy();
+ mutableComponent.getBTree().destroy();
+ ((IVirtualBufferCache) mutableComponent.getRTree().getBufferCache()).close();
+ }
}
@Override
@@ -183,24 +189,39 @@
throw new HyracksDataException("Failed to clear the index since it is not activated.");
}
- mutableComponent.getRTree().clear();
- mutableComponent.getBTree().clear();
- mutableComponent.reset();
+ for (ILSMComponent c : memoryComponents) {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) c;
+ mutableComponent.getRTree().clear();
+ mutableComponent.getBTree().clear();
+ mutableComponent.reset();
+ }
}
@Override
public void getOperationalComponents(ILSMIndexOperationContext ctx) {
List<ILSMComponent> operationalComponents = ctx.getComponentHolder();
operationalComponents.clear();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
+ int cmc = currentMutableComponentId.get();
+ ctx.setCurrentMutableComponentId(cmc);
+ int numMutableComponents = memoryComponents.size();
switch (ctx.getOperation()) {
case INSERT:
case DELETE:
case FLUSH:
- operationalComponents.add(mutableComponent);
+ operationalComponents.add(memoryComponents.get(cmc));
break;
case SEARCH:
- operationalComponents.add(mutableComponent);
+ for (int i = 0; i < numMutableComponents - 1; i++) {
+ ILSMComponent c = memoryComponents.get((cmc + i + 1) % numMutableComponents);
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) c;
+ if (mutableComponent.isReadable()) {
+ // Make sure newest components are added first
+ operationalComponents.add(0, mutableComponent);
+ }
+ }
+ // The current mutable component is always added
+ operationalComponents.add(0, memoryComponents.get(cmc));
operationalComponents.addAll(immutableComponents);
break;
case MERGE:
@@ -211,10 +232,23 @@
}
}
+ @Override
+ public void search(ILSMIndexOperationContext ictx, IIndexCursor cursor, ISearchPredicate pred)
+ throws HyracksDataException, IndexException {
+ LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
+ List<ILSMComponent> operationalComponents = ictx.getComponentHolder();
+
+ LSMRTreeCursorInitialState initialState = new LSMRTreeCursorInitialState(rtreeLeafFrameFactory,
+ rtreeInteriorFrameFactory, btreeLeafFrameFactory, ctx.getBTreeMultiComparator(), lsmHarness,
+ comparatorFields, linearizerArray, ctx.searchCallback, operationalComponents);
+
+ cursor.open(initialState, pred);
+ }
+
protected LSMComponentFileReferences getMergeTargetFileName(List<ILSMComponent> mergingDiskComponents)
throws HyracksDataException {
- RTree firstTree = ((LSMRTreeImmutableComponent) mergingDiskComponents.get(0)).getRTree();
- RTree lastTree = ((LSMRTreeImmutableComponent) mergingDiskComponents.get(mergingDiskComponents.size() - 1))
+ RTree firstTree = ((LSMRTreeDiskComponent) mergingDiskComponents.get(0)).getRTree();
+ RTree lastTree = ((LSMRTreeDiskComponent) mergingDiskComponents.get(mergingDiskComponents.size() - 1))
.getRTree();
FileReference firstFile = diskFileMapProvider.lookupFileName(firstTree.getFileId());
FileReference lastFile = diskFileMapProvider.lookupFileName(lastTree.getFileId());
@@ -223,11 +257,11 @@
return fileRefs;
}
- protected LSMRTreeImmutableComponent createDiskComponent(ILSMComponentFactory factory, FileReference insertFileRef,
+ protected LSMRTreeDiskComponent createDiskComponent(ILSMComponentFactory factory, FileReference insertFileRef,
FileReference deleteFileRef, FileReference bloomFilterFileRef, boolean createComponent)
throws HyracksDataException, IndexException {
// Create new tree instance.
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) factory
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) factory
.createLSMComponentInstance(new LSMComponentFileReferences(insertFileRef, deleteFileRef,
bloomFilterFileRef));
if (createComponent) {
@@ -248,31 +282,43 @@
@Override
public ITreeIndexFrameFactory getLeafFrameFactory() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getLeafFrameFactory();
}
@Override
public ITreeIndexFrameFactory getInteriorFrameFactory() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getInteriorFrameFactory();
}
@Override
public IFreePageManager getFreePageManager() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getFreePageManager();
}
@Override
public int getFieldCount() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getFieldCount();
}
@Override
public int getRootPageId() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getRootPageId();
}
@Override
public int getFileId() {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) memoryComponents
+ .get(currentMutableComponentId.get());
return mutableComponent.getRTree().getFileId();
}
@@ -293,8 +339,8 @@
// added RTree tuple.
RangePredicate btreeRangePredicate = new RangePredicate(tuple, tuple, true, true,
ctx.getBTreeMultiComparator(), ctx.getBTreeMultiComparator());
- ITreeIndexCursor cursor = ctx.memBTreeAccessor.createSearchCursor();
- ctx.memBTreeAccessor.search(cursor, btreeRangePredicate);
+ ITreeIndexCursor cursor = ctx.currentMutableBTreeAccessor.createSearchCursor();
+ ctx.currentMutableBTreeAccessor.search(cursor, btreeRangePredicate);
boolean foundTupleInMemoryBTree = false;
try {
if (cursor.hasNext()) {
@@ -305,8 +351,8 @@
}
if (foundTupleInMemoryBTree) {
try {
- ctx.memBTreeAccessor.delete(tuple);
- } catch (BTreeNonExistentKeyException e) {
+ ctx.currentMutableBTreeAccessor.delete(tuple);
+ } catch (TreeIndexNonExistentKeyException e) {
// Tuple has been deleted in the meantime. Do nothing.
// This normally shouldn't happen if we are dealing with
// good citizens since LSMRTree is used as a secondary
@@ -314,31 +360,24 @@
// insert between them.
}
} else {
- ctx.memRTreeAccessor.insert(tuple);
+ ctx.currentMutableRTreeAccessor.insert(tuple);
}
} else {
try {
- ctx.memBTreeAccessor.insert(tuple);
- } catch (BTreeDuplicateKeyException e) {
+ ctx.currentMutableBTreeAccessor.insert(tuple);
+ } catch (TreeIndexDuplicateKeyException e) {
// Do nothing, because one delete tuple is enough to indicate
// that all the corresponding insert tuples are deleted
}
}
- mutableComponent.setIsModified();
}
protected LSMRTreeOpContext createOpContext(IModificationOperationCallback modCallback) {
- RTreeAccessor rtreeAccessor = (RTree.RTreeAccessor) mutableComponent.getRTree().createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
- BTreeAccessor btreeAccessor = (BTree.BTreeAccessor) mutableComponent.getBTree().createAccessor(
- NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
-
- return new LSMRTreeOpContext(rtreeAccessor, (IRTreeLeafFrame) rtreeLeafFrameFactory.createFrame(),
- (IRTreeInteriorFrame) rtreeInteriorFrameFactory.createFrame(), virtualFreePageManager
- .getMetaDataFrameFactory().createFrame(), 4, btreeAccessor, btreeLeafFrameFactory,
- btreeInteriorFrameFactory, virtualFreePageManager.getMetaDataFrameFactory().createFrame(),
- rtreeCmpFactories, btreeCmpFactories, modCallback, NoOpOperationCallback.INSTANCE);
+ return new LSMRTreeOpContext(memoryComponents, (IRTreeLeafFrame) rtreeLeafFrameFactory.createFrame(),
+ (IRTreeInteriorFrame) rtreeInteriorFrameFactory.createFrame(), btreeLeafFrameFactory,
+ btreeInteriorFrameFactory, rtreeCmpFactories, btreeCmpFactories, modCallback,
+ NoOpOperationCallback.INSTANCE);
}
@Override
@@ -346,10 +385,6 @@
return rtreeCmpFactories;
}
- public boolean isEmptyIndex() throws HyracksDataException {
- return componentsRef.get().isEmpty() && !mutableComponent.isModified();
- }
-
@Override
public void validate() throws HyracksDataException {
throw new UnsupportedOperationException("Validation not implemented for LSM R-Trees.");
@@ -357,8 +392,13 @@
@Override
public long getMemoryAllocationSize() {
- IBufferCache virtualBufferCache = mutableComponent.getRTree().getBufferCache();
- return virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ long size = 0;
+ for (ILSMComponent c : memoryComponents) {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) c;
+ IBufferCache virtualBufferCache = mutableComponent.getRTree().getBufferCache();
+ size += virtualBufferCache.getNumPages() * virtualBufferCache.getPageSize();
+ }
+ return size;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
index 0b71373..a4b6139 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTree.java
@@ -16,7 +16,6 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
import java.util.List;
-import java.util.ListIterator;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
@@ -66,7 +65,7 @@
public class LSMRTree extends AbstractLSMRTree {
- public LSMRTree(IVirtualBufferCache virtualBufferCache, ITreeIndexFrameFactory rtreeInteriorFrameFactory,
+ public LSMRTree(List<IVirtualBufferCache> virtualBufferCaches, ITreeIndexFrameFactory rtreeInteriorFrameFactory,
ITreeIndexFrameFactory rtreeLeafFrameFactory, ITreeIndexFrameFactory btreeInteriorFrameFactory,
ITreeIndexFrameFactory btreeLeafFrameFactory, ILSMIndexFileManager fileNameManager,
TreeIndexFactory<RTree> diskRTreeFactory, TreeIndexFactory<BTree> diskBTreeFactory,
@@ -75,12 +74,12 @@
IBinaryComparatorFactory[] btreeCmpFactories, ILinearizeComparatorFactory linearizer,
int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray, ILSMMergePolicy mergePolicy,
ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(virtualBufferCache, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory,
- btreeLeafFrameFactory, fileNameManager, diskRTreeFactory, new LSMRTreeComponentFactory(
- diskRTreeFactory, diskBTreeFactory, bloomFilterFactory), diskFileMapProvider, fieldCount,
- rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields, linearizerArray,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
+ ILSMIOOperationCallback ioOpCallback) {
+ super(virtualBufferCaches, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory,
+ btreeLeafFrameFactory, fileNameManager, new LSMRTreeDiskComponentFactory(diskRTreeFactory,
+ diskBTreeFactory, bloomFilterFactory), diskFileMapProvider, fieldCount, rtreeCmpFactories,
+ btreeCmpFactories, linearizer, comparatorFields, linearizerArray, bloomFilterFalsePositiveRate,
+ mergePolicy, opTracker, ioScheduler, ioOpCallback);
}
/**
@@ -94,7 +93,7 @@
@Override
public synchronized void activate() throws HyracksDataException {
super.activate();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
List<LSMComponentFileReferences> validFileReferences;
try {
validFileReferences = fileManager.cleanupAndGetValidFiles();
@@ -103,7 +102,7 @@
}
immutableComponents.clear();
for (LSMComponentFileReferences lsmComonentFileReference : validFileReferences) {
- LSMRTreeImmutableComponent component;
+ LSMRTreeDiskComponent component;
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(),
@@ -120,9 +119,9 @@
@Override
public synchronized void deactivate(boolean flushOnExit) throws HyracksDataException {
super.deactivate(flushOnExit);
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) c;
RTree rtree = component.getRTree();
BTree btree = component.getBTree();
BloomFilter bloomFilter = component.getBloomFilter();
@@ -141,9 +140,9 @@
@Override
public synchronized void destroy() throws HyracksDataException {
super.destroy();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) c;
component.getBTree().destroy();
component.getBloomFilter().destroy();
component.getRTree().destroy();
@@ -154,9 +153,9 @@
@Override
public synchronized void clear() throws HyracksDataException {
super.clear();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) c;
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) c;
component.getBTree().deactivate();
component.getBloomFilter().deactivate();
component.getRTree().deactivate();
@@ -168,64 +167,23 @@
}
@Override
- public void search(ILSMIndexOperationContext ictx, IIndexCursor cursor, ISearchPredicate pred)
- throws HyracksDataException, IndexException {
- LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
- List<ILSMComponent> operationalComponents = ctx.getComponentHolder();
- boolean includeMutableComponent = operationalComponents.get(0) == mutableComponent;
- int numTrees = operationalComponents.size();
-
- ListIterator<ILSMComponent> diskComponentIter = operationalComponents.listIterator();
- ITreeIndexAccessor[] rTreeAccessors = new ITreeIndexAccessor[numTrees];
- ITreeIndexAccessor[] bTreeAccessors = new ITreeIndexAccessor[numTrees];
- int diskComponentIx = 0;
- if (includeMutableComponent) {
- rTreeAccessors[0] = ctx.memRTreeAccessor;
- bTreeAccessors[0] = ctx.memBTreeAccessor;
- diskComponentIx++;
- diskComponentIter.next();
- }
-
- while (diskComponentIter.hasNext()) {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) diskComponentIter.next();
- RTree diskRTree = component.getRTree();
- BTree diskBTree = component.getBTree();
- rTreeAccessors[diskComponentIx] = diskRTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- bTreeAccessors[diskComponentIx] = diskBTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- diskComponentIx++;
- }
-
- LSMRTreeCursorInitialState initialState = new LSMRTreeCursorInitialState(numTrees, rtreeLeafFrameFactory,
- rtreeInteriorFrameFactory, btreeLeafFrameFactory, ctx.getBTreeMultiComparator(), rTreeAccessors,
- bTreeAccessors, includeMutableComponent, lsmHarness, comparatorFields, linearizerArray,
- ctx.searchCallback, operationalComponents);
- cursor.open(initialState, pred);
- }
-
- @Override
- public boolean scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
+ public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException {
- if (!mutableComponent.isModified()) {
- return false;
- }
+ ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
LSMComponentFileReferences componentFileRefs = fileManager.getRelFlushFileReference();
ILSMIndexOperationContext rctx = createOpContext(NoOpOperationCallback.INSTANCE);
- LSMRTreeMutableComponent flushingComponent = (LSMRTreeMutableComponent) ctx.getComponentHolder().get(0);
rctx.setOperation(IndexOperation.FLUSH);
rctx.getComponentHolder().addAll(ctx.getComponentHolder());
LSMRTreeAccessor accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, componentFileRefs
.getInsertIndexFileReference(), componentFileRefs.getDeleteIndexFileReference(), componentFileRefs
.getBloomFilterFileReference(), callback));
- return true;
}
@Override
public ILSMComponent flush(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMRTreeFlushOperation flushOp = (LSMRTreeFlushOperation) operation;
- LSMRTreeMutableComponent flushingComponent = (LSMRTreeMutableComponent) flushOp.getFlushingComponent();
+ LSMRTreeMemoryComponent flushingComponent = (LSMRTreeMemoryComponent) flushOp.getFlushingComponent();
// Renaming order is critical because we use assume ordering when we
// read the file names when we open the tree.
// The RTree should be renamed before the BTree.
@@ -236,7 +194,7 @@
RTreeSearchCursor rtreeScanCursor = (RTreeSearchCursor) memRTreeAccessor.createSearchCursor();
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
+ LSMRTreeDiskComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
flushOp.getBTreeFlushTarget(), flushOp.getBloomFilterFlushTarget(), true);
RTree diskRTree = component.getRTree();
IIndexBulkLoader rTreeBulkloader;
@@ -329,17 +287,10 @@
@Override
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException {
- // Renaming order is critical because we use assume ordering when we
- // read the file names when we open the tree.
- // The RTree should be renamed before the BTree.
- List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
ILSMIndexOperationContext rctx = createOpContext(NoOpOperationCallback.INSTANCE);
- rctx.getComponentHolder().addAll(mergingComponents);
- ITreeIndexCursor cursor = new LSMRTreeSortedCursor(rctx, linearizer);
- ISearchPredicate rtreeSearchPred = new SearchPredicate(null, null);
- search(rctx, cursor, rtreeSearchPred);
-
rctx.setOperation(IndexOperation.MERGE);
+ List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
+ ITreeIndexCursor cursor = new LSMRTreeSortedCursor(rctx, linearizer);
LSMComponentFileReferences relMergeFileRefs = getMergeTargetFileName(mergingComponents);
ILSMIndexAccessorInternal accessor = new LSMRTreeAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation((ILSMIndexAccessorInternal) accessor,
@@ -348,14 +299,16 @@
}
@Override
- public ILSMComponent merge(List<ILSMComponent> mergedComponents, ILSMIOOperation operation)
- throws HyracksDataException, IndexException {
+ public ILSMComponent merge(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMRTreeMergeOperation mergeOp = (LSMRTreeMergeOperation) operation;
ITreeIndexCursor cursor = mergeOp.getCursor();
- mergedComponents.addAll(mergeOp.getMergingComponents());
+ ISearchPredicate rtreeSearchPred = new SearchPredicate(null, null);
+ ILSMIndexOperationContext opCtx = ((LSMRTreeSortedCursor) cursor).getOpCtx();
+ opCtx.getComponentHolder().addAll(mergeOp.getMergingComponents());
+ search(opCtx, cursor, rtreeSearchPred);
- LSMRTreeImmutableComponent mergedComponent = createDiskComponent(componentFactory,
- mergeOp.getRTreeMergeTarget(), mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
+ LSMRTreeDiskComponent mergedComponent = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
+ mergeOp.getBTreeMergeTarget(), mergeOp.getBloomFilterMergeTarget(), true);
IIndexBulkLoader bulkLoader = mergedComponent.getRTree().createBulkLoader(1.0f, false, 0L, false);
try {
@@ -389,7 +342,7 @@
public MultiComparator getMultiComparator() {
LSMRTreeOpContext concreteCtx = (LSMRTreeOpContext) ctx;
- return concreteCtx.rtreeOpContext.cmp;
+ return concreteCtx.currentRTreeOpContext.cmp;
}
}
@@ -427,7 +380,7 @@
} catch (HyracksDataException | IndexException e) {
throw new TreeIndexException(e);
}
- bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
+ bulkLoader = ((LSMRTreeDiskComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
numElementsHint, false);
}
@@ -459,19 +412,19 @@
protected void cleanupArtifacts() throws HyracksDataException {
if (!cleanedUpArtifacts) {
cleanedUpArtifacts = true;
- ((LSMRTreeImmutableComponent) component).getRTree().deactivate();
- ((LSMRTreeImmutableComponent) component).getRTree().destroy();
- ((LSMRTreeImmutableComponent) component).getBTree().deactivate();
- ((LSMRTreeImmutableComponent) component).getBTree().destroy();
- ((LSMRTreeImmutableComponent) component).getBloomFilter().deactivate();
- ((LSMRTreeImmutableComponent) component).getBloomFilter().destroy();
+ ((LSMRTreeDiskComponent) component).getRTree().deactivate();
+ ((LSMRTreeDiskComponent) component).getRTree().destroy();
+ ((LSMRTreeDiskComponent) component).getBTree().deactivate();
+ ((LSMRTreeDiskComponent) component).getBTree().destroy();
+ ((LSMRTreeDiskComponent) component).getBloomFilter().deactivate();
+ ((LSMRTreeDiskComponent) component).getBloomFilter().destroy();
}
}
}
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) lsmComponent;
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) lsmComponent;
// Flush the bloom filter first.
int fileId = component.getBloomFilter().getFileId();
IBufferCache bufferCache = component.getBTree().getBufferCache();
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
index 0cd578a..4e322d4 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeAbstractCursor.java
@@ -12,146 +12,155 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
-
-import java.util.List;
-
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
-import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
-import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
-import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
-import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
-import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
-import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
-import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
-import edu.uci.ics.hyracks.storage.am.rtree.impls.SearchPredicate;
-import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
-
-public abstract class LSMRTreeAbstractCursor implements ITreeIndexCursor {
-
- protected RTreeSearchCursor[] rtreeCursors;
- protected boolean open = false;
- protected ITreeIndexCursor[] btreeCursors;
- protected ITreeIndexAccessor[] rTreeAccessors;
- protected ITreeIndexAccessor[] bTreeAccessors;
- private MultiComparator btreeCmp;
- protected int numberOfTrees;
- protected SearchPredicate rtreeSearchPredicate;
- protected RangePredicate btreeRangePredicate;
- protected ITupleReference frameTuple;
- protected boolean includeMemRTree;
- protected ILSMHarness lsmHarness;
- protected boolean foundNext;
- protected final ILSMIndexOperationContext opCtx;
-
- protected List<ILSMComponent> operationalComponents;
-
- public LSMRTreeAbstractCursor(ILSMIndexOperationContext opCtx) {
- super();
- this.opCtx = opCtx;
- }
-
- public RTreeSearchCursor getCursor(int cursorIndex) {
- return rtreeCursors[cursorIndex];
- }
-
- @Override
- public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
- LSMRTreeCursorInitialState lsmInitialState = (LSMRTreeCursorInitialState) initialState;
- btreeCmp = lsmInitialState.getBTreeCmp();
- includeMemRTree = lsmInitialState.getIncludeMemComponent();
- operationalComponents = lsmInitialState.getOperationalComponents();
- lsmHarness = lsmInitialState.getLSMHarness();
- numberOfTrees = lsmInitialState.getNumberOfTrees();
- rTreeAccessors = lsmInitialState.getRTreeAccessors();
- bTreeAccessors = lsmInitialState.getBTreeAccessors();
-
- rtreeCursors = new RTreeSearchCursor[numberOfTrees];
- btreeCursors = new ITreeIndexCursor[numberOfTrees];
-
- int i = 0;
- if (includeMemRTree) {
- rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
- .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
- .getRTreeLeafFrameFactory().createFrame());
-
- // No need for a bloom filter for the in-memory BTree.
- btreeCursors[i] = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState.getBTreeLeafFrameFactory()
- .createFrame(), false);
- ++i;
- }
- for (; i < numberOfTrees; i++) {
- rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
- .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
- .getRTreeLeafFrameFactory().createFrame());
-
- btreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitialState
- .getBTreeLeafFrameFactory().createFrame(), false,
- ((LSMRTreeImmutableComponent) operationalComponents.get(i)).getBloomFilter());
- }
-
- rtreeSearchPredicate = (SearchPredicate) searchPred;
- btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
-
- open = true;
- }
-
- @Override
- public ICachedPage getPage() {
- // do nothing
- return null;
- }
-
- @Override
- public void close() throws HyracksDataException {
- if (!open) {
- return;
- }
-
- try {
- if (rtreeCursors != null && btreeCursors != null) {
- for (int i = 0; i < numberOfTrees; i++) {
- rtreeCursors[i].close();
- btreeCursors[i].close();
- }
- }
- rtreeCursors = null;
- btreeCursors = null;
- } finally {
- lsmHarness.endSearch(opCtx);
- }
-
- open = false;
- }
-
- @Override
- public void setBufferCache(IBufferCache bufferCache) {
- // do nothing
- }
-
- @Override
- public void setFileId(int fileId) {
- // do nothing
- }
-
- @Override
- public ITupleReference getTuple() {
- return frameTuple;
- }
-
- @Override
- public boolean exclusiveLatchNodes() {
- return false;
- }
-
+package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
+
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
+import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.BloomFilterAwareBTreePointSearchCursor;
+import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
+import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
+import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
+import edu.uci.ics.hyracks.storage.am.rtree.impls.SearchPredicate;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.buffercache.ICachedPage;
+
+public abstract class LSMRTreeAbstractCursor implements ITreeIndexCursor {
+
+ protected boolean open;
+ protected RTreeSearchCursor[] rtreeCursors;
+ protected ITreeIndexCursor[] btreeCursors;
+ protected ITreeIndexAccessor[] rtreeAccessors;
+ protected ITreeIndexAccessor[] btreeAccessors;
+ private MultiComparator btreeCmp;
+ protected int numberOfTrees;
+ protected SearchPredicate rtreeSearchPredicate;
+ protected RangePredicate btreeRangePredicate;
+ protected ITupleReference frameTuple;
+ protected boolean includeMutableComponent;
+ protected ILSMHarness lsmHarness;
+ protected boolean foundNext;
+ protected final ILSMIndexOperationContext opCtx;
+
+ protected List<ILSMComponent> operationalComponents;
+
+ public LSMRTreeAbstractCursor(ILSMIndexOperationContext opCtx) {
+ super();
+ this.opCtx = opCtx;
+ }
+
+ public RTreeSearchCursor getCursor(int cursorIndex) {
+ return rtreeCursors[cursorIndex];
+ }
+
+ @Override
+ public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+ LSMRTreeCursorInitialState lsmInitialState = (LSMRTreeCursorInitialState) initialState;
+ btreeCmp = lsmInitialState.getBTreeCmp();
+
+ operationalComponents = lsmInitialState.getOperationalComponents();
+ lsmHarness = lsmInitialState.getLSMHarness();
+ numberOfTrees = operationalComponents.size();
+
+ rtreeCursors = new RTreeSearchCursor[numberOfTrees];
+ btreeCursors = new ITreeIndexCursor[numberOfTrees];
+ rtreeAccessors = new ITreeIndexAccessor[numberOfTrees];
+ btreeAccessors = new ITreeIndexAccessor[numberOfTrees];
+
+ includeMutableComponent = false;
+ for (int i = 0; i < numberOfTrees; i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ RTree rtree;
+ BTree btree;
+ if (component.getType() == LSMComponentType.MEMORY) {
+ includeMutableComponent = true;
+ // No need for a bloom filter for the in-memory BTree.
+ btreeCursors[i] = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState
+ .getBTreeLeafFrameFactory().createFrame(), false);
+ rtree = (RTree) ((LSMRTreeMemoryComponent) component).getRTree();
+ btree = (BTree) ((LSMRTreeMemoryComponent) component).getBTree();
+ } else {
+ btreeCursors[i] = new BloomFilterAwareBTreePointSearchCursor((IBTreeLeafFrame) lsmInitialState
+ .getBTreeLeafFrameFactory().createFrame(), false,
+ ((LSMRTreeDiskComponent) operationalComponents.get(i)).getBloomFilter());
+ rtree = (RTree) ((LSMRTreeDiskComponent) component).getRTree();
+ btree = (BTree) ((LSMRTreeDiskComponent) component).getBTree();
+ }
+ rtreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
+ .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
+ .getRTreeLeafFrameFactory().createFrame());
+ rtreeAccessors[i] = rtree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ btreeAccessors[i] = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ }
+
+ rtreeSearchPredicate = (SearchPredicate) searchPred;
+ btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
+
+ open = true;
+ }
+
+ @Override
+ public ICachedPage getPage() {
+ // do nothing
+ return null;
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (!open) {
+ return;
+ }
+
+ try {
+ if (rtreeCursors != null && btreeCursors != null) {
+ for (int i = 0; i < numberOfTrees; i++) {
+ rtreeCursors[i].close();
+ btreeCursors[i].close();
+ }
+ }
+ rtreeCursors = null;
+ btreeCursors = null;
+ } finally {
+ lsmHarness.endSearch(opCtx);
+ }
+
+ open = false;
+ }
+
+ @Override
+ public void setBufferCache(IBufferCache bufferCache) {
+ // do nothing
+ }
+
+ @Override
+ public void setFileId(int fileId) {
+ // do nothing
+ }
+
+ @Override
+ public ITupleReference getTuple() {
+ return frameTuple;
+ }
+
+ @Override
+ public boolean exclusiveLatchNodes() {
+ return false;
+ }
+
}
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeCursorInitialState.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeCursorInitialState.java
index 21230b0..27545b9 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeCursorInitialState.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeCursorInitialState.java
@@ -20,7 +20,6 @@
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
@@ -29,35 +28,26 @@
public class LSMRTreeCursorInitialState implements ICursorInitialState {
- private final int numberOfTrees;
private final ITreeIndexFrameFactory rtreeInteriorFrameFactory;
private final ITreeIndexFrameFactory rtreeLeafFrameFactory;
private final ITreeIndexFrameFactory btreeLeafFrameFactory;
private final MultiComparator btreeCmp;
private final MultiComparator hilbertCmp;
- private final ITreeIndexAccessor[] rTreeAccessors;
- private final ITreeIndexAccessor[] bTreeAccessors;
- private final boolean includeMemRTree;
private final ILSMHarness lsmHarness;
private final int[] comparatorFields;
private ISearchOperationCallback searchCallback;
private final List<ILSMComponent> operationalComponents;
- public LSMRTreeCursorInitialState(int numberOfTrees, ITreeIndexFrameFactory rtreeLeafFrameFactory,
+ public LSMRTreeCursorInitialState(ITreeIndexFrameFactory rtreeLeafFrameFactory,
ITreeIndexFrameFactory rtreeInteriorFrameFactory, ITreeIndexFrameFactory btreeLeafFrameFactory,
- MultiComparator btreeCmp, ITreeIndexAccessor[] rTreeAccessors, ITreeIndexAccessor[] bTreeAccessors,
- boolean includeMemRTree, ILSMHarness lsmHarness, int[] comparatorFields,
+ MultiComparator btreeCmp, ILSMHarness lsmHarness, int[] comparatorFields,
IBinaryComparatorFactory[] linearizerArray, ISearchOperationCallback searchCallback,
List<ILSMComponent> operationalComponents) {
- this.numberOfTrees = numberOfTrees;
this.rtreeLeafFrameFactory = rtreeLeafFrameFactory;
this.rtreeInteriorFrameFactory = rtreeInteriorFrameFactory;
this.btreeLeafFrameFactory = btreeLeafFrameFactory;
this.btreeCmp = btreeCmp;
- this.rTreeAccessors = rTreeAccessors;
- this.bTreeAccessors = bTreeAccessors;
- this.includeMemRTree = includeMemRTree;
this.lsmHarness = lsmHarness;
this.comparatorFields = comparatorFields;
this.hilbertCmp = MultiComparator.create(linearizerArray);
@@ -73,10 +63,6 @@
return comparatorFields;
}
- public int getNumberOfTrees() {
- return numberOfTrees;
- }
-
public ITreeIndexFrameFactory getRTreeInteriorFrameFactory() {
return rtreeInteriorFrameFactory;
}
@@ -106,18 +92,6 @@
return operationalComponents;
}
- public ITreeIndexAccessor[] getRTreeAccessors() {
- return rTreeAccessors;
- }
-
- public ITreeIndexAccessor[] getBTreeAccessors() {
- return bTreeAccessors;
- }
-
- public boolean getIncludeMemComponent() {
- return includeMemRTree;
- }
-
public ILSMHarness getLSMHarness() {
return lsmHarness;
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponent.java
similarity index 86%
rename from hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponent.java
index cb290c7..7bc3f79 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeImmutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponent.java
@@ -17,15 +17,15 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.bloomfilter.impls.BloomFilter;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractImmutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractDiskLSMComponent;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
-public class LSMRTreeImmutableComponent extends AbstractImmutableLSMComponent {
+public class LSMRTreeDiskComponent extends AbstractDiskLSMComponent {
private final RTree rtree;
private final BTree btree;
private final BloomFilter bloomFilter;
- public LSMRTreeImmutableComponent(RTree rtree, BTree btree, BloomFilter bloomFilter) {
+ public LSMRTreeDiskComponent(RTree rtree, BTree btree, BloomFilter bloomFilter) {
this.rtree = rtree;
this.btree = btree;
this.bloomFilter = bloomFilter;
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponentFactory.java
similarity index 87%
rename from hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
rename to hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponentFactory.java
index 8f618da..a130562 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeComponentFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeDiskComponentFactory.java
@@ -26,12 +26,12 @@
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-public class LSMRTreeComponentFactory implements ILSMComponentFactory {
+public class LSMRTreeDiskComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<RTree> rtreeFactory;
private final TreeIndexFactory<BTree> btreeFactory;
private final BloomFilterFactory bloomFilterFactory;
- public LSMRTreeComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory,
+ public LSMRTreeDiskComponentFactory(TreeIndexFactory<RTree> rtreeFactory, TreeIndexFactory<BTree> btreeFactory,
BloomFilterFactory bloomFilterFactory) {
this.rtreeFactory = rtreeFactory;
this.btreeFactory = btreeFactory;
@@ -41,7 +41,7 @@
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException,
HyracksDataException {
- return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ return new LSMRTreeDiskComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
btreeFactory.createIndexInstance(cfr.getDeleteIndexFileReference()),
bloomFilterFactory.createBloomFiltertInstance(cfr.getBloomFilterFileReference()));
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
index 68098bb..18d7a7e 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeFlushOperation.java
@@ -64,8 +64,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.flush(this);
+ return true;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMutableComponent.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMemoryComponent.java
similarity index 82%
rename from hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMutableComponent.java
rename to hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMemoryComponent.java
index 7d572ad..452643b 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMutableComponent.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMemoryComponent.java
@@ -18,19 +18,18 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
-import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMutableLSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.AbstractMemoryLSMComponent;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
-public class LSMRTreeMutableComponent extends AbstractMutableLSMComponent {
+public class LSMRTreeMemoryComponent extends AbstractMemoryLSMComponent {
private final RTree rtree;
private final BTree btree;
- private final IVirtualBufferCache vbc;
- public LSMRTreeMutableComponent(RTree rtree, BTree btree, IVirtualBufferCache vbc) {
+ public LSMRTreeMemoryComponent(RTree rtree, BTree btree, IVirtualBufferCache vbc, boolean isActive) {
+ super(vbc, isActive);
this.rtree = rtree;
this.btree = btree;
- this.vbc = vbc;
}
public RTree getRTree() {
@@ -42,11 +41,6 @@
}
@Override
- protected boolean isFull() {
- return vbc.isFull();
- }
-
- @Override
protected void reset() throws HyracksDataException {
super.reset();
rtree.deactivate();
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
index ef67a2b..da7a2fb 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeMergeOperation.java
@@ -53,7 +53,7 @@
public Set<IODeviceHandle> getReadDevices() {
Set<IODeviceHandle> devs = new HashSet<IODeviceHandle>();
for (ILSMComponent o : mergingComponents) {
- LSMRTreeImmutableComponent component = (LSMRTreeImmutableComponent) o;
+ LSMRTreeDiskComponent component = (LSMRTreeDiskComponent) o;
devs.add(component.getRTree().getFileReference().getDeviceHandle());
if (component.getBTree() != null) {
devs.add(component.getBTree().getFileReference().getDeviceHandle());
@@ -75,8 +75,9 @@
}
@Override
- public void perform() throws HyracksDataException, IndexException {
+ public Boolean call() throws HyracksDataException, IndexException {
accessor.merge(this);
+ return true;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeOpContext.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeOpContext.java
index 8607a05..b94feba 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeOpContext.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeOpContext.java
@@ -24,7 +24,6 @@
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexMetaDataFrame;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@@ -37,43 +36,69 @@
public final class LSMRTreeOpContext implements ILSMIndexOperationContext {
- public RTreeOpContext rtreeOpContext;
- public BTreeOpContext btreeOpContext;
- public final RTree.RTreeAccessor memRTreeAccessor;
- public final BTree.BTreeAccessor memBTreeAccessor;
+ public RTree.RTreeAccessor[] mutableRTreeAccessors;
+ public RTree.RTreeAccessor currentMutableRTreeAccessor;
+ public BTree.BTreeAccessor[] mutableBTreeAccessors;
+ public BTree.BTreeAccessor currentMutableBTreeAccessor;
+
+ public RTreeOpContext[] rtreeOpContexts;
+ public BTreeOpContext[] btreeOpContexts;
+ public RTreeOpContext currentRTreeOpContext;
+ public BTreeOpContext currentBTreeOpContext;
+
private IndexOperation op;
public final List<ILSMComponent> componentHolder;
public final IModificationOperationCallback modificationCallback;
public final ISearchOperationCallback searchCallback;
- public LSMRTreeOpContext(RTree.RTreeAccessor memRtreeAccessor, IRTreeLeafFrame rtreeLeafFrame,
- IRTreeInteriorFrame rtreeInteriorFrame, ITreeIndexMetaDataFrame rtreeMetaFrame, int rTreeHeightHint,
- BTree.BTreeAccessor memBtreeAccessor, ITreeIndexFrameFactory btreeLeafFrameFactory,
- ITreeIndexFrameFactory btreeInteriorFrameFactory, ITreeIndexMetaDataFrame btreeMetaFrame,
- IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
- IModificationOperationCallback modificationCallback, ISearchOperationCallback searchCallback) {
- this.memRTreeAccessor = memRtreeAccessor;
- this.memBTreeAccessor = memBtreeAccessor;
+ public LSMRTreeOpContext(List<ILSMComponent> mutableComponents, IRTreeLeafFrame rtreeLeafFrame,
+ IRTreeInteriorFrame rtreeInteriorFrame, ITreeIndexFrameFactory btreeLeafFrameFactory,
+ ITreeIndexFrameFactory btreeInteriorFrameFactory, IBinaryComparatorFactory[] rtreeCmpFactories,
+ IBinaryComparatorFactory[] btreeCmpFactories, IModificationOperationCallback modificationCallback,
+ ISearchOperationCallback searchCallback) {
+ mutableRTreeAccessors = new RTree.RTreeAccessor[mutableComponents.size()];
+ mutableBTreeAccessors = new BTree.BTreeAccessor[mutableComponents.size()];
+ rtreeOpContexts = new RTreeOpContext[mutableComponents.size()];
+ btreeOpContexts = new BTreeOpContext[mutableComponents.size()];
+
+ for (int i = 0; i < mutableComponents.size(); i++) {
+ LSMRTreeMemoryComponent mutableComponent = (LSMRTreeMemoryComponent) mutableComponents.get(i);
+ mutableRTreeAccessors[i] = (RTree.RTreeAccessor) mutableComponent.getRTree().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ mutableBTreeAccessors[i] = (BTree.BTreeAccessor) mutableComponent.getBTree().createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+
+ rtreeOpContexts[i] = mutableRTreeAccessors[i].getOpContext();
+ btreeOpContexts[i] = mutableBTreeAccessors[i].getOpContext();
+ }
+
+ assert mutableComponents.size() > 0;
+ currentRTreeOpContext = rtreeOpContexts[0];
+ currentBTreeOpContext = btreeOpContexts[0];
this.componentHolder = new LinkedList<ILSMComponent>();
this.modificationCallback = modificationCallback;
this.searchCallback = searchCallback;
- this.rtreeOpContext = new RTreeOpContext(rtreeLeafFrame, rtreeInteriorFrame, rtreeMetaFrame, rtreeCmpFactories,
- rTreeHeightHint, NoOpOperationCallback.INSTANCE);
- this.btreeOpContext = new BTreeOpContext(memBtreeAccessor, btreeLeafFrameFactory, btreeInteriorFrameFactory,
- btreeMetaFrame, btreeCmpFactories, NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
}
public void setOperation(IndexOperation newOp) {
reset();
- if (newOp == IndexOperation.INSERT) {
- rtreeOpContext.setOperation(newOp);
- } else if (newOp == IndexOperation.DELETE) {
- btreeOpContext.setOperation(IndexOperation.INSERT);
- }
this.op = newOp;
}
@Override
+ public void setCurrentMutableComponentId(int currentMutableComponentId) {
+ currentMutableRTreeAccessor = mutableRTreeAccessors[currentMutableComponentId];
+ currentMutableBTreeAccessor = mutableBTreeAccessors[currentMutableComponentId];
+ currentRTreeOpContext = rtreeOpContexts[currentMutableComponentId];
+ currentBTreeOpContext = btreeOpContexts[currentMutableComponentId];
+ if (op == IndexOperation.INSERT) {
+ currentRTreeOpContext.setOperation(op);
+ } else if (op == IndexOperation.DELETE) {
+ currentBTreeOpContext.setOperation(IndexOperation.INSERT);
+ }
+ }
+
+ @Override
public void reset() {
componentHolder.clear();
}
@@ -84,7 +109,7 @@
}
public MultiComparator getBTreeMultiComparator() {
- return btreeOpContext.cmp;
+ return currentBTreeOpContext.cmp;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
index 9b0869c..30dd467 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSearchCursor.java
@@ -61,7 +61,7 @@
if (currentCursor < numberOfTrees) {
rtreeCursors[currentCursor].reset();
try {
- rTreeAccessors[currentCursor].search(rtreeCursors[currentCursor], rtreeSearchPredicate);
+ rtreeAccessors[currentCursor].search(rtreeCursors[currentCursor], rtreeSearchPredicate);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -80,14 +80,10 @@
boolean killerTupleFound = false;
for (int i = 0; i <= currentCursor; i++) {
- try {
- btreeCursors[i].reset();
- btreeRangePredicate.setHighKey(currentTuple, true);
- btreeRangePredicate.setLowKey(currentTuple, true);
- bTreeAccessors[i].search(btreeCursors[i], btreeRangePredicate);
- } catch (IndexException e) {
- throw new HyracksDataException(e);
- }
+ btreeCursors[i].reset();
+ btreeRangePredicate.setHighKey(currentTuple, true);
+ btreeRangePredicate.setLowKey(currentTuple, true);
+ btreeAccessors[i].search(btreeCursors[i], btreeRangePredicate);
try {
if (btreeCursors[i].hasNext()) {
killerTupleFound = true;
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
index 31dfe27..dd31165 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeSortedCursor.java
@@ -36,6 +36,10 @@
reset();
}
+ public ILSMIndexOperationContext getOpCtx() {
+ return opCtx;
+ }
+
@Override
public void reset() throws HyracksDataException {
depletedRtreeCursors = new boolean[numberOfTrees];
@@ -44,7 +48,7 @@
for (int i = 0; i < numberOfTrees; i++) {
rtreeCursors[i].reset();
try {
- rTreeAccessors[i].search(rtreeCursors[i], rtreeSearchPredicate);
+ rtreeAccessors[i].search(rtreeCursors[i], rtreeSearchPredicate);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -103,7 +107,7 @@
btreeCursors[i].reset();
btreeRangePredicate.setHighKey(frameTuple, true);
btreeRangePredicate.setLowKey(frameTuple, true);
- bTreeAccessors[i].search(btreeCursors[i], btreeRangePredicate);
+ btreeAccessors[i].search(btreeCursors[i], btreeRangePredicate);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
@@ -138,7 +142,7 @@
for (int i = 0; i < numberOfTrees; i++) {
rtreeCursors[i].reset();
try {
- rTreeAccessors[i].search(rtreeCursors[i], rtreeSearchPredicate);
+ rtreeAccessors[i].search(rtreeCursors[i], rtreeSearchPredicate);
} catch (IndexException e) {
throw new HyracksDataException(e);
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
index 0be497a..812e942 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuples.java
@@ -16,7 +16,6 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.impls;
import java.util.List;
-import java.util.ListIterator;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
@@ -25,7 +24,6 @@
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
-import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IModificationOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
@@ -51,6 +49,7 @@
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMComponentFileReferences;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMTreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.TreeIndexFactory;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
@@ -67,7 +66,7 @@
// because it should have a different tuple writer in it's leaf frames.
private final ILSMComponentFactory bulkLoaComponentFactory;
- public LSMRTreeWithAntiMatterTuples(IVirtualBufferCache virtualBufferCache,
+ public LSMRTreeWithAntiMatterTuples(List<IVirtualBufferCache> virtualBufferCaches,
ITreeIndexFrameFactory rtreeInteriorFrameFactory, ITreeIndexFrameFactory rtreeLeafFrameFactory,
ITreeIndexFrameFactory btreeInteriorFrameFactory, ITreeIndexFrameFactory btreeLeafFrameFactory,
ILSMIndexFileManager fileManager, TreeIndexFactory<RTree> diskRTreeFactory,
@@ -75,20 +74,19 @@
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
ILinearizeComparatorFactory linearizer, int[] comparatorFields, IBinaryComparatorFactory[] linearizerArray,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) {
- super(virtualBufferCache, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory,
- btreeLeafFrameFactory, fileManager, diskRTreeFactory, new LSMRTreeWithAntiMatterTuplesComponentFactory(
- diskRTreeFactory), diskFileMapProvider, fieldCount, rtreeCmpFactories, btreeCmpFactories,
- linearizer, comparatorFields, linearizerArray, 0, mergePolicy, opTracker, ioScheduler,
- ioOpCallbackProvider);
- bulkLoaComponentFactory = new LSMRTreeWithAntiMatterTuplesComponentFactory(bulkLoadRTreeFactory);
+ ILSMIOOperationCallback ioOpCallback) {
+ super(virtualBufferCaches, rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory,
+ btreeLeafFrameFactory, fileManager, new LSMRTreeWithAntiMatterTuplesDiskComponentFactory(diskRTreeFactory),
+ diskFileMapProvider, fieldCount, rtreeCmpFactories, btreeCmpFactories, linearizer, comparatorFields,
+ linearizerArray, 0, mergePolicy, opTracker, ioScheduler, ioOpCallback);
+ bulkLoaComponentFactory = new LSMRTreeWithAntiMatterTuplesDiskComponentFactory(bulkLoadRTreeFactory);
this.bTreeTupleSorter = null;
}
@Override
public synchronized void activate() throws HyracksDataException {
super.activate();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
immutableComponents.clear();
List<LSMComponentFileReferences> validFileReferences;
try {
@@ -97,7 +95,7 @@
throw new HyracksDataException(e);
}
for (LSMComponentFileReferences lsmComonentFileReference : validFileReferences) {
- LSMRTreeImmutableComponent component;
+ LSMRTreeDiskComponent component;
try {
component = createDiskComponent(componentFactory,
lsmComonentFileReference.getInsertIndexFileReference(), null, null, false);
@@ -112,9 +110,9 @@
@Override
public synchronized void deactivate(boolean flushOnExit) throws HyracksDataException {
super.deactivate(flushOnExit);
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- RTree rtree = (RTree) ((LSMRTreeImmutableComponent) c).getRTree();
+ RTree rtree = (RTree) ((LSMRTreeDiskComponent) c).getRTree();
rtree.deactivate();
}
isActivated = false;
@@ -128,9 +126,9 @@
@Override
public synchronized void destroy() throws HyracksDataException {
super.destroy();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- RTree rtree = (RTree) ((LSMRTreeImmutableComponent) c).getRTree();
+ RTree rtree = (RTree) ((LSMRTreeDiskComponent) c).getRTree();
rtree.destroy();
}
fileManager.deleteDirs();
@@ -139,9 +137,9 @@
@Override
public synchronized void clear() throws HyracksDataException {
super.clear();
- List<ILSMComponent> immutableComponents = componentsRef.get();
+ List<ILSMComponent> immutableComponents = diskComponents;
for (ILSMComponent c : immutableComponents) {
- RTree rtree = (RTree) ((LSMRTreeImmutableComponent) c).getRTree();
+ RTree rtree = (RTree) ((LSMRTreeDiskComponent) c).getRTree();
rtree.deactivate();
rtree.destroy();
}
@@ -149,65 +147,16 @@
}
@Override
- public void search(ILSMIndexOperationContext ictx, IIndexCursor cursor, ISearchPredicate pred)
- throws HyracksDataException, IndexException {
- LSMRTreeOpContext ctx = (LSMRTreeOpContext) ictx;
- List<ILSMComponent> operationalComponents = ictx.getComponentHolder();
- boolean includeMutableComponent = operationalComponents.get(0) == mutableComponent;
- LSMRTreeWithAntiMatterTuplesSearchCursor lsmTreeCursor = (LSMRTreeWithAntiMatterTuplesSearchCursor) cursor;
- int numDiskRComponents = operationalComponents.size();
-
- LSMRTreeCursorInitialState initialState;
- ITreeIndexAccessor[] bTreeAccessors = null;
- if (includeMutableComponent) {
- // Only in-memory BTree
- bTreeAccessors = new ITreeIndexAccessor[1];
- bTreeAccessors[0] = ctx.memBTreeAccessor;
- }
-
- initialState = new LSMRTreeCursorInitialState(numDiskRComponents, rtreeLeafFrameFactory,
- rtreeInteriorFrameFactory, btreeLeafFrameFactory, ctx.getBTreeMultiComparator(), null, bTreeAccessors,
- includeMutableComponent, lsmHarness, comparatorFields, linearizerArray, ctx.searchCallback,
- operationalComponents);
-
- lsmTreeCursor.open(initialState, pred);
-
- ListIterator<ILSMComponent> diskComponentsIter = operationalComponents.listIterator();
- int diskComponentIx = 0;
- if (includeMutableComponent) {
- // Open cursor of in-memory RTree
- ctx.memRTreeAccessor.search(lsmTreeCursor.getMemRTreeCursor(), pred);
- diskComponentIx++;
- diskComponentsIter.next();
- }
-
- // Open cursors of on-disk RTrees.
- ITreeIndexAccessor[] diskRTreeAccessors = new ITreeIndexAccessor[numDiskRComponents];
- while (diskComponentsIter.hasNext()) {
- RTree diskRTree = (RTree) ((LSMRTreeImmutableComponent) diskComponentsIter.next()).getRTree();
- diskRTreeAccessors[diskComponentIx] = diskRTree.createAccessor(NoOpOperationCallback.INSTANCE,
- NoOpOperationCallback.INSTANCE);
- diskRTreeAccessors[diskComponentIx].search(lsmTreeCursor.getCursor(diskComponentIx), pred);
- diskComponentIx++;
- }
- lsmTreeCursor.initPriorityQueue();
- }
-
- @Override
- public boolean scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
+ public void scheduleFlush(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException {
- if (!mutableComponent.isModified()) {
- return false;
- }
+ ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
LSMRTreeOpContext opCtx = createOpContext(NoOpOperationCallback.INSTANCE);
LSMComponentFileReferences relFlushFileRefs = fileManager.getRelFlushFileReference();
- ILSMComponent flushingComponent = ctx.getComponentHolder().get(0);
opCtx.setOperation(IndexOperation.FLUSH);
opCtx.getComponentHolder().add(flushingComponent);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, opCtx);
ioScheduler.scheduleOperation(new LSMRTreeFlushOperation(accessor, flushingComponent, relFlushFileRefs
.getInsertIndexFileReference(), null, null, callback));
- return true;
}
@Override
@@ -216,14 +165,14 @@
// Renaming order is critical because we use assume ordering when we
// read the file names when we open the tree.
// The RTree should be renamed before the BTree.
- LSMRTreeMutableComponent flushingComponent = (LSMRTreeMutableComponent) flushOp.getFlushingComponent();
+ LSMRTreeMemoryComponent flushingComponent = (LSMRTreeMemoryComponent) flushOp.getFlushingComponent();
ITreeIndexAccessor memRTreeAccessor = flushingComponent.getRTree().createAccessor(
NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
RTreeSearchCursor rtreeScanCursor = (RTreeSearchCursor) memRTreeAccessor.createSearchCursor();
SearchPredicate rtreeNullPredicate = new SearchPredicate(null, null);
memRTreeAccessor.search(rtreeScanCursor, rtreeNullPredicate);
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(),
- null, null, true);
+ LSMRTreeDiskComponent component = createDiskComponent(componentFactory, flushOp.getRTreeFlushTarget(), null,
+ null, true);
RTree diskRTree = component.getRTree();
// scan the memory BTree
@@ -296,13 +245,10 @@
@Override
public void scheduleMerge(ILSMIndexOperationContext ctx, ILSMIOOperationCallback callback)
throws HyracksDataException, IndexException {
- List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
LSMRTreeOpContext rctx = createOpContext(NoOpOperationCallback.INSTANCE);
- rctx.getComponentHolder().addAll(mergingComponents);
- ITreeIndexCursor cursor = new LSMRTreeWithAntiMatterTuplesSearchCursor(ctx);
- ISearchPredicate rtreeSearchPred = new SearchPredicate(null, null);
- search(rctx, cursor, (SearchPredicate) rtreeSearchPred);
rctx.setOperation(IndexOperation.MERGE);
+ List<ILSMComponent> mergingComponents = ctx.getComponentHolder();
+ ITreeIndexCursor cursor = new LSMRTreeWithAntiMatterTuplesSearchCursor(rctx);
LSMComponentFileReferences relMergeFileRefs = getMergeTargetFileName(mergingComponents);
ILSMIndexAccessorInternal accessor = new LSMRTreeWithAntiMatterTuplesAccessor(lsmHarness, rctx);
ioScheduler.scheduleOperation(new LSMRTreeMergeOperation(accessor, mergingComponents, cursor, relMergeFileRefs
@@ -310,21 +256,17 @@
}
@Override
- public ILSMComponent merge(List<ILSMComponent> mergedComponents, ILSMIOOperation operation)
- throws HyracksDataException, IndexException {
+ public ILSMComponent merge(ILSMIOOperation operation) throws HyracksDataException, IndexException {
LSMRTreeMergeOperation mergeOp = (LSMRTreeMergeOperation) operation;
ITreeIndexCursor cursor = mergeOp.getCursor();
- mergedComponents.addAll(mergeOp.getMergingComponents());
-
- // Nothing to merge.
- if (mergedComponents.size() <= 1) {
- cursor.close();
- return null;
- }
+ ISearchPredicate rtreeSearchPred = new SearchPredicate(null, null);
+ ILSMIndexOperationContext opCtx = ((LSMIndexSearchCursor) cursor).getOpCtx();
+ opCtx.getComponentHolder().addAll(mergeOp.getMergingComponents());
+ search(opCtx, cursor, rtreeSearchPred);
// Bulk load the tuples from all on-disk RTrees into the new RTree.
- LSMRTreeImmutableComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(),
- null, null, true);
+ LSMRTreeDiskComponent component = createDiskComponent(componentFactory, mergeOp.getRTreeMergeTarget(), null,
+ null, true);
RTree mergedRTree = component.getRTree();
IIndexBulkLoader bulkloader = mergedRTree.createBulkLoader(1.0f, false, 0L, false);
try {
@@ -358,7 +300,7 @@
public MultiComparator getMultiComparator() {
LSMRTreeOpContext concreteCtx = (LSMRTreeOpContext) ctx;
- return concreteCtx.rtreeOpContext.cmp;
+ return concreteCtx.currentRTreeOpContext.cmp;
}
}
@@ -397,7 +339,7 @@
} catch (HyracksDataException | IndexException e) {
throw new TreeIndexException(e);
}
- bulkLoader = ((LSMRTreeImmutableComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
+ bulkLoader = ((LSMRTreeDiskComponent) component).getRTree().createBulkLoader(fillFactor, verifyInput,
numElementsHint, false);
}
@@ -429,8 +371,8 @@
protected void cleanupArtifacts() throws HyracksDataException {
if (!cleanedUpArtifacts) {
cleanedUpArtifacts = true;
- ((LSMRTreeImmutableComponent) component).getRTree().deactivate();
- ((LSMRTreeImmutableComponent) component).getRTree().destroy();
+ ((LSMRTreeDiskComponent) component).getRTree().deactivate();
+ ((LSMRTreeDiskComponent) component).getRTree().destroy();
}
}
@@ -438,7 +380,7 @@
@Override
public void markAsValid(ILSMComponent lsmComponent) throws HyracksDataException {
- RTree rtree = ((LSMRTreeImmutableComponent) lsmComponent).getRTree();
+ RTree rtree = ((LSMRTreeDiskComponent) lsmComponent).getRTree();
forceFlushDirtyPages(rtree);
markAsValidInternal(rtree);
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesDiskComponentFactory.java
similarity index 83%
rename from hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
rename to hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesDiskComponentFactory.java
index 39f35b7..fe253cc 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesComponentFactory.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesDiskComponentFactory.java
@@ -23,16 +23,16 @@
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
-public class LSMRTreeWithAntiMatterTuplesComponentFactory implements ILSMComponentFactory {
+public class LSMRTreeWithAntiMatterTuplesDiskComponentFactory implements ILSMComponentFactory {
private final TreeIndexFactory<RTree> rtreeFactory;
- public LSMRTreeWithAntiMatterTuplesComponentFactory(TreeIndexFactory<RTree> rtreeFactory) {
+ public LSMRTreeWithAntiMatterTuplesDiskComponentFactory(TreeIndexFactory<RTree> rtreeFactory) {
this.rtreeFactory = rtreeFactory;
}
@Override
public ILSMComponent createLSMComponentInstance(LSMComponentFileReferences cfr) throws IndexException {
- return new LSMRTreeImmutableComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
+ return new LSMRTreeDiskComponent(rtreeFactory.createIndexInstance(cfr.getInsertIndexFileReference()),
null, null);
}
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
index c937709..cbaf3b3 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/impls/LSMRTreeWithAntiMatterTuplesSearchCursor.java
@@ -18,6 +18,7 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.btree.api.IBTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTreeRangeSearchCursor;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ICursorInitialState;
@@ -25,74 +26,133 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMComponent.LSMComponentType;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexOperationContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.LSMIndexSearchCursor;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeInteriorFrame;
import edu.uci.ics.hyracks.storage.am.rtree.api.IRTreeLeafFrame;
+import edu.uci.ics.hyracks.storage.am.rtree.impls.RTree;
import edu.uci.ics.hyracks.storage.am.rtree.impls.RTreeSearchCursor;
+import edu.uci.ics.hyracks.storage.am.rtree.impls.SearchPredicate;
public class LSMRTreeWithAntiMatterTuplesSearchCursor extends LSMIndexSearchCursor {
- private RTreeSearchCursor memRTreeCursor;
- private BTreeRangeSearchCursor memBTreeCursor;
+ private ITreeIndexAccessor[] mutableRTreeAccessors;
+ private ITreeIndexAccessor[] btreeAccessors;
+ private RTreeSearchCursor[] mutableRTreeCursors;
+ private ITreeIndexCursor[] btreeCursors;
private RangePredicate btreeRangePredicate;
- private ITreeIndexAccessor memBTreeAccessor;
private boolean foundNext;
private ITupleReference frameTuple;
private int[] comparatorFields;
private MultiComparator btreeCmp;
+ private int currentCursor;
+ private SearchPredicate rtreeSearchPredicate;
+ private int numMutableComponents;
public LSMRTreeWithAntiMatterTuplesSearchCursor(ILSMIndexOperationContext opCtx) {
super(opCtx);
+ currentCursor = 0;
}
@Override
- public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException {
+ public void open(ICursorInitialState initialState, ISearchPredicate searchPred) throws HyracksDataException,
+ IndexException {
LSMRTreeCursorInitialState lsmInitialState = (LSMRTreeCursorInitialState) initialState;
cmp = lsmInitialState.getHilbertCmp();
btreeCmp = lsmInitialState.getBTreeCmp();
- int numDiskRTrees = lsmInitialState.getNumberOfTrees();
- rangeCursors = new RTreeSearchCursor[numDiskRTrees];
- for (int i = 0; i < numDiskRTrees; i++) {
- rangeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
- .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
- .getRTreeLeafFrameFactory().createFrame());
- }
- includeMemComponent = lsmInitialState.getIncludeMemComponent();
- operationalComponents = lsmInitialState.getOperationalComponents();
- if (includeMemComponent) {
- memRTreeCursor = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState.getRTreeInteriorFrameFactory()
- .createFrame(), (IRTreeLeafFrame) lsmInitialState.getRTreeLeafFrameFactory().createFrame());
- memBTreeCursor = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState.getBTreeLeafFrameFactory()
- .createFrame(), false);
- memBTreeAccessor = lsmInitialState.getBTreeAccessors()[0];
- btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
- }
lsmHarness = lsmInitialState.getLSMHarness();
comparatorFields = lsmInitialState.getComparatorFields();
+ operationalComponents = lsmInitialState.getOperationalComponents();
+ rtreeSearchPredicate = (SearchPredicate) searchPred;
+
+ includeMutableComponent = false;
+ numMutableComponents = 0;
+ int numImmutableComponents = 0;
+ for (ILSMComponent component : operationalComponents) {
+ if (component.getType() == LSMComponentType.MEMORY) {
+ includeMutableComponent = true;
+ numMutableComponents++;
+ } else {
+ numImmutableComponents++;
+ }
+ }
+ if (includeMutableComponent) {
+ btreeRangePredicate = new RangePredicate(null, null, true, true, btreeCmp, btreeCmp);
+ }
+
+ mutableRTreeCursors = new RTreeSearchCursor[numMutableComponents];
+ mutableRTreeAccessors = new ITreeIndexAccessor[numMutableComponents];
+ btreeCursors = new BTreeRangeSearchCursor[numMutableComponents];
+ btreeAccessors = new ITreeIndexAccessor[numMutableComponents];
+ for (int i = 0; i < numMutableComponents; i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ RTree rtree = (RTree) ((LSMRTreeMemoryComponent) component).getRTree();
+ BTree btree = (BTree) ((LSMRTreeMemoryComponent) component).getBTree();
+ mutableRTreeCursors[i] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
+ .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
+ .getRTreeLeafFrameFactory().createFrame());
+ btreeCursors[i] = new BTreeRangeSearchCursor((IBTreeLeafFrame) lsmInitialState.getBTreeLeafFrameFactory()
+ .createFrame(), false);
+ btreeAccessors[i] = btree.createAccessor(NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ mutableRTreeAccessors[i] = rtree.createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ }
+
+ rangeCursors = new RTreeSearchCursor[numImmutableComponents];
+ ITreeIndexAccessor[] immutableRTreeAccessors = new ITreeIndexAccessor[numImmutableComponents];
+ int j = 0;
+ for (int i = numMutableComponents; i < operationalComponents.size(); i++) {
+ ILSMComponent component = operationalComponents.get(i);
+ rangeCursors[j] = new RTreeSearchCursor((IRTreeInteriorFrame) lsmInitialState
+ .getRTreeInteriorFrameFactory().createFrame(), (IRTreeLeafFrame) lsmInitialState
+ .getRTreeLeafFrameFactory().createFrame());
+ RTree rtree = (RTree) ((LSMRTreeDiskComponent) component).getRTree();
+ immutableRTreeAccessors[j] = rtree.createAccessor(NoOpOperationCallback.INSTANCE,
+ NoOpOperationCallback.INSTANCE);
+ immutableRTreeAccessors[j].search(rangeCursors[j], searchPred);
+ j++;
+ }
+ searchNextCursor();
setPriorityQueueComparator();
+ initPriorityQueue();
+ }
+
+ private void searchNextCursor() throws HyracksDataException, IndexException {
+ if (currentCursor < numMutableComponents) {
+ mutableRTreeCursors[currentCursor].reset();
+ mutableRTreeAccessors[currentCursor].search(mutableRTreeCursors[currentCursor], rtreeSearchPredicate);
+ }
}
@Override
public boolean hasNext() throws HyracksDataException, IndexException {
- if (includeMemComponent) {
+ if (includeMutableComponent) {
if (foundNext) {
return true;
}
- while (memRTreeCursor.hasNext()) {
- memRTreeCursor.next();
- ITupleReference memRTreeTuple = memRTreeCursor.getTuple();
- if (searchMemBTree(memRTreeTuple)) {
- foundNext = true;
- frameTuple = memRTreeTuple;
- return true;
+
+ while (currentCursor < numMutableComponents) {
+ while (mutableRTreeCursors[currentCursor].hasNext()) {
+ mutableRTreeCursors[currentCursor].next();
+ ITupleReference currentTuple = mutableRTreeCursors[currentCursor].getTuple();
+ if (searchMemBTrees(currentTuple, currentCursor)) {
+ foundNext = true;
+ frameTuple = currentTuple;
+ return true;
+ }
}
+ mutableRTreeCursors[currentCursor].close();
+ currentCursor++;
+ searchNextCursor();
}
while (super.hasNext()) {
super.next();
ITupleReference diskRTreeTuple = super.getTuple();
- if (searchMemBTree(diskRTreeTuple)) {
+ if (searchMemBTrees(diskRTreeTuple, numMutableComponents - 1)) {
foundNext = true;
frameTuple = diskRTreeTuple;
return true;
@@ -107,7 +167,7 @@
@Override
public void next() throws HyracksDataException {
- if (includeMemComponent) {
+ if (includeMutableComponent) {
foundNext = false;
} else {
super.next();
@@ -117,7 +177,7 @@
@Override
public ITupleReference getTuple() {
- if (includeMemComponent) {
+ if (includeMutableComponent) {
return frameTuple;
} else {
return super.getTuple();
@@ -127,48 +187,49 @@
@Override
public void reset() throws HyracksDataException, IndexException {
- if (includeMemComponent) {
- memRTreeCursor.reset();
- memBTreeCursor.reset();
+ if (includeMutableComponent) {
+ for (int i = 0; i < numMutableComponents; i++) {
+ mutableRTreeCursors[i].reset();
+ btreeCursors[i].reset();
+ }
}
+ currentCursor = 0;
super.reset();
}
@Override
public void close() throws HyracksDataException {
- if (includeMemComponent) {
- memRTreeCursor.close();
- memBTreeCursor.close();
+ if (includeMutableComponent) {
+ for (int i = 0; i < numMutableComponents; i++) {
+ mutableRTreeCursors[i].close();
+ btreeCursors[i].close();
+ }
}
+ currentCursor = 0;
super.close();
}
- public ITreeIndexCursor getMemRTreeCursor() {
- return memRTreeCursor;
- }
-
@Override
protected int compare(MultiComparator cmp, ITupleReference tupleA, ITupleReference tupleB) {
return cmp.selectiveFieldCompare(tupleA, tupleB, comparatorFields);
}
- private boolean searchMemBTree(ITupleReference tuple) throws HyracksDataException {
- try {
+ private boolean searchMemBTrees(ITupleReference tuple, int lastBTreeToSearch) throws HyracksDataException,
+ IndexException {
+ for (int i = 0; i <= lastBTreeToSearch; i++) {
+ btreeCursors[i].reset();
btreeRangePredicate.setHighKey(tuple, true);
btreeRangePredicate.setLowKey(tuple, true);
- memBTreeAccessor.search(memBTreeCursor, btreeRangePredicate);
- } catch (IndexException e) {
- throw new HyracksDataException(e);
- }
- try {
- if (memBTreeCursor.hasNext()) {
- return false;
- } else {
- return true;
+ btreeAccessors[i].search(btreeCursors[i], btreeRangePredicate);
+ try {
+ if (btreeCursors[i].hasNext()) {
+ return false;
+ }
+ } finally {
+ btreeCursors[i].close();
}
- } finally {
- memBTreeCursor.close();
}
+ return true;
}
@Override
diff --git a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
index 50e5d09..ee942e7 100644
--- a/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
+++ b/hyracks/hyracks-storage-am-lsm-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/utils/LSMRTreeUtils.java
@@ -15,6 +15,8 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.utils;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ILinearizeComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
@@ -31,7 +33,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
import edu.uci.ics.hyracks.storage.am.common.freepage.LinkedListFreePageManagerFactory;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexFileManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
@@ -58,12 +60,12 @@
import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
public class LSMRTreeUtils {
- public static LSMRTree createLSMTree(IVirtualBufferCache virtualBufferCache, FileReference file,
+ public static LSMRTree createLSMTree(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider,
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback,
ILinearizeComparatorFactory linearizeCmpFactory) throws TreeIndexException {
LSMTypeAwareTupleWriterFactory rtreeTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(typeTraits, false);
LSMTypeAwareTupleWriterFactory btreeTupleWriterFactory = new LSMTypeAwareTupleWriterFactory(typeTraits, true);
@@ -99,21 +101,21 @@
ILSMIndexFileManager fileNameManager = new LSMRTreeFileManager(diskFileMapProvider, file, diskRTreeFactory,
diskBTreeFactory);
- LSMRTree lsmTree = new LSMRTree(virtualBufferCache, rtreeInteriorFrameFactory, rtreeLeafFrameFactory,
+ LSMRTree lsmTree = new LSMRTree(virtualBufferCaches, rtreeInteriorFrameFactory, rtreeLeafFrameFactory,
btreeInteriorFrameFactory, btreeLeafFrameFactory, fileNameManager, diskRTreeFactory, diskBTreeFactory,
bloomFilterFactory, bloomFilterFalsePositiveRate, diskFileMapProvider, typeTraits.length,
rtreeCmpFactories, btreeCmpFactories, linearizeCmpFactory, comparatorFields, linearizerArray,
- mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
+ mergePolicy, opTracker, ioScheduler, ioOpCallback);
return lsmTree;
}
public static LSMRTreeWithAntiMatterTuples createLSMTreeWithAntiMatterTuples(
- IVirtualBufferCache virtualBufferCache, FileReference file, IBufferCache diskBufferCache,
+ List<IVirtualBufferCache> virtualBufferCaches, FileReference file, IBufferCache diskBufferCache,
IFileMapProvider diskFileMapProvider, ITypeTraits[] typeTraits,
IBinaryComparatorFactory[] rtreeCmpFactories, IBinaryComparatorFactory[] btreeCmpFactories,
IPrimitiveValueProviderFactory[] valueProviderFactories, RTreePolicyType rtreePolicyType,
ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider, ILinearizeComparatorFactory linearizerCmpFactory)
+ ILSMIOOperationCallback ioOpCallback, ILinearizeComparatorFactory linearizerCmpFactory)
throws TreeIndexException {
LSMRTreeTupleWriterFactory rtreeTupleWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, false);
LSMRTreeTupleWriterFactory btreeTupleWriterFactory = new LSMRTreeTupleWriterFactory(typeTraits, true);
@@ -151,11 +153,11 @@
ILSMIndexFileManager fileNameManager = new LSMRTreeWithAntiMatterTuplesFileManager(diskFileMapProvider, file,
diskRTreeFactory);
- LSMRTreeWithAntiMatterTuples lsmTree = new LSMRTreeWithAntiMatterTuples(virtualBufferCache,
+ LSMRTreeWithAntiMatterTuples lsmTree = new LSMRTreeWithAntiMatterTuples(virtualBufferCaches,
rtreeInteriorFrameFactory, rtreeLeafFrameFactory, btreeInteriorFrameFactory, btreeLeafFrameFactory,
fileNameManager, diskRTreeFactory, bulkLoadRTreeFactory, diskFileMapProvider, typeTraits.length,
rtreeCmpFactories, btreeCmpFactories, linearizerCmpFactory, comparatorFields, linearizerArray,
- mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider);
+ mergePolicy, opTracker, ioScheduler, ioOpCallback);
return lsmTree;
}
diff --git a/hyracks/hyracks-storage-am-rtree/pom.xml b/hyracks/hyracks-storage-am-rtree/pom.xml
index edbf803..410ed6f 100644
--- a/hyracks/hyracks-storage-am-rtree/pom.xml
+++ b/hyracks/hyracks-storage-am-rtree/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,21 +41,21 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
index 528c004..7bb3583 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTree.java
@@ -147,7 +147,7 @@
private RTreeOpContext createOpContext(IModificationOperationCallback modificationCallback) {
return new RTreeOpContext((IRTreeLeafFrame) leafFrameFactory.createFrame(),
(IRTreeInteriorFrame) interiorFrameFactory.createFrame(), freePageManager.getMetaDataFrameFactory()
- .createFrame(), cmpFactories, 8, modificationCallback);
+ .createFrame(), cmpFactories, modificationCallback);
}
private void insert(ITupleReference tuple, IIndexOperationContext ictx) throws HyracksDataException,
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeOpContext.java b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeOpContext.java
index da73f85..a9171c3 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeOpContext.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/impls/RTreeOpContext.java
@@ -31,6 +31,7 @@
public class RTreeOpContext implements IIndexOperationContext {
private static final int INITIAL_TRAVERSE_LIST_SIZE = 100;
+ private static final int INITIAL_HEIGHT = 8;
public final MultiComparator cmp;
public final IRTreeInteriorFrame interiorFrame;
public final IRTreeLeafFrame leafFrame;
@@ -51,20 +52,20 @@
public final IModificationOperationCallback modificationCallback;
public RTreeOpContext(IRTreeLeafFrame leafFrame, IRTreeInteriorFrame interiorFrame,
- ITreeIndexMetaDataFrame metaFrame, IBinaryComparatorFactory[] cmpFactories, int treeHeightHint,
+ ITreeIndexMetaDataFrame metaFrame, IBinaryComparatorFactory[] cmpFactories,
IModificationOperationCallback modificationCallback) {
-
- if (cmpFactories[0] != null) {
+
+ if (cmpFactories[0] != null) {
this.cmp = MultiComparator.create(cmpFactories);
} else {
this.cmp = null;
}
-
+
this.interiorFrame = interiorFrame;
this.leafFrame = leafFrame;
this.metaFrame = metaFrame;
this.modificationCallback = modificationCallback;
- pathList = new PathList(treeHeightHint, treeHeightHint);
+ pathList = new PathList(INITIAL_HEIGHT, INITIAL_HEIGHT);
NSNUpdates = new ArrayList<ICachedPage>();
LSNUpdates = new ArrayList<ICachedPage>();
}
diff --git a/hyracks/hyracks-storage-common/pom.xml b/hyracks/hyracks-storage-common/pom.xml
index 519320f..349c223 100644
--- a/hyracks/hyracks-storage-common/pom.xml
+++ b/hyracks/hyracks-storage-common/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,7 +41,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-test-support/pom.xml b/hyracks/hyracks-test-support/pom.xml
index bc00394..d2e1697 100644
--- a/hyracks/hyracks-test-support/pom.xml
+++ b/hyracks/hyracks-test-support/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -41,33 +41,33 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
index eafeff2..0a70185 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexExamplesTest.java
@@ -46,6 +46,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
import edu.uci.ics.hyracks.storage.am.common.api.UnsortedInputException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.common.impls.TreeIndexDiskOrderScanCursor;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@@ -140,9 +141,9 @@
/**
* This test the btree page split. Originally this test didn't pass since
- * the btree was spliting by cardinality and not size. Thus, we might end
- * up with a situation where there is not enough space to insert the new
- * tuple after the split which will throw an error and the split won't be
+ * the btree was spliting by cardinality and not size. Thus, we might end up
+ * with a situation where there is not enough space to insert the new tuple
+ * after the split which will throw an error and the split won't be
* propagated to upper level; thus, the tree is corrupted. Now, it split
* page by size. The correct behavior on abnormally large keys/values.
*/
@@ -716,6 +717,12 @@
}
// Success.
break;
+ } catch (TreeIndexDuplicateKeyException e2) {
+ if (j != i) {
+ fail("Unexpected exception: " + e2.getMessage());
+ }
+ // Success.
+ break;
}
}
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
index 5b7b050..9de217b 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/btree/OrderedIndexTestUtils.java
@@ -35,7 +35,6 @@
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.btree.util.BTreeUtils;
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
@@ -44,6 +43,7 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.ISearchPredicate;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
@SuppressWarnings("rawtypes")
@@ -205,7 +205,7 @@
// Set expected values. Do this only after insertion succeeds
// because we ignore duplicate keys.
ctx.insertCheckTuple(createStringCheckTuple(fieldValues, ctx.getKeyFieldCount()), ctx.getCheckTuples());
- } catch (BTreeDuplicateKeyException e) {
+ } catch (TreeIndexDuplicateKeyException e) {
// Ignore duplicate key insertions.
}
}
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
index 693dc47..6e1995c 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/config/AccessMethodTestsConfig.java
@@ -29,6 +29,7 @@
public static final int LSM_RTREE_BULKLOAD_ROUNDS = 5;
public static final int LSM_RTREE_MAX_TREES_TO_MERGE = 3;
public static final boolean LSM_RTREE_TEST_RSTAR_POLICY = false;
+ public static final int LSM_RTREE_NUM_MUTABLE_COMPONENTS = 2;
// Test params for BTree, LSMBTree.
public static final int BTREE_NUM_TUPLES_TO_INSERT = 100;
@@ -39,17 +40,18 @@
// Test params for LSMBTree only.
public static final int LSM_BTREE_BULKLOAD_ROUNDS = 5;
public static final int LSM_BTREE_MAX_TREES_TO_MERGE = 10;
+ public static final int LSM_BTREE_NUM_MUTABLE_COMPONENTS = 2;
// Mem configuration for RTree.
public static final int RTREE_PAGE_SIZE = 512;
public static final int RTREE_NUM_PAGES = 1000;
- public static final int RTREE_MAX_OPEN_FILES = 10;
+ public static final int RTREE_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int RTREE_HYRACKS_FRAME_SIZE = 128;
// Mem configuration for LSMRTree and LSMRTreeWithAntiMatterTuples.
public static final int LSM_RTREE_DISK_PAGE_SIZE = 512;
public static final int LSM_RTREE_DISK_NUM_PAGES = 1000;
- public static final int LSM_RTREE_DISK_MAX_OPEN_FILES = 2000;
+ public static final int LSM_RTREE_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int LSM_RTREE_MEM_PAGE_SIZE = 512;
public static final int LSM_RTREE_MEM_NUM_PAGES = 1000;
public static final int LSM_RTREE_HYRACKS_FRAME_SIZE = 128;
@@ -58,13 +60,13 @@
// Mem configuration for BTree.
public static final int BTREE_PAGE_SIZE = 256;
public static final int BTREE_NUM_PAGES = 100;
- public static final int BTREE_MAX_OPEN_FILES = 10;
+ public static final int BTREE_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int BTREE_HYRACKS_FRAME_SIZE = 128;
// Mem configuration for LSMBTree.
public static final int LSM_BTREE_DISK_PAGE_SIZE = 256;
public static final int LSM_BTREE_DISK_NUM_PAGES = 1000;
- public static final int LSM_BTREE_DISK_MAX_OPEN_FILES = 200;
+ public static final int LSM_BTREE_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int LSM_BTREE_MEM_PAGE_SIZE = 256;
public static final int LSM_BTREE_MEM_NUM_PAGES = 100;
public static final int LSM_BTREE_HYRACKS_FRAME_SIZE = 128;
@@ -73,11 +75,13 @@
// Mem configuration for Inverted Index.
public static final int LSM_INVINDEX_DISK_PAGE_SIZE = 1024;
public static final int LSM_INVINDEX_DISK_NUM_PAGES = 1000;
- public static final int LSM_INVINDEX_DISK_MAX_OPEN_FILES = 1000;
+ public static final int LSM_INVINDEX_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int LSM_INVINDEX_MEM_PAGE_SIZE = 1024;
public static final int LSM_INVINDEX_MEM_NUM_PAGES = 100;
public static final int LSM_INVINDEX_HYRACKS_FRAME_SIZE = 32768;
public static final double LSM_INVINDEX_BLOOMFILTER_FALSE_POSITIVE_RATE = 0.01;
+ public static final int LSM_INVINDEX_NUM_MUTABLE_COMPONENTS = 2;
+
// Test parameters.
public static final int LSM_INVINDEX_NUM_DOCS_TO_INSERT = 100;
// Used for full-fledged search test.
@@ -100,7 +104,7 @@
// Mem configuration for BloomFilter.
public static final int BLOOM_FILTER_PAGE_SIZE = 256;
public static final int BLOOM_FILTER_NUM_PAGES = 1000;
- public static final int BLOOM_FILTER_MAX_OPEN_FILES = 10;
+ public static final int BLOOM_FILTER_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int BLOOM_FILTER_HYRACKS_FRAME_SIZE = 128;
}
@@ -111,9 +115,12 @@
public static final int RTREE_NUM_INSERT_ROUNDS = 2;
public static final int RTREE_NUM_DELETE_ROUNDS = 2;
public static final int RTREE_MULTITHREAD_NUM_OPERATIONS = 10000;
+public static final boolean RTREE_TEST_RSTAR_POLICY = true;
// Test params for LSMRTree and LSMRTreeWithAntiMatterTuples.
public static final int LSM_RTREE_BULKLOAD_ROUNDS = 5;
+public static final boolean LSM_RTREE_TEST_RSTAR_POLICY = false;
public static final int LSM_RTREE_MAX_TREES_TO_MERGE = 3;
+public static final int LSM_RTREE_NUM_MUTABLE_COMPONENTS = 2;
// Test params for BTree, LSMBTree.
public static final int BTREE_NUM_TUPLES_TO_INSERT = 10000;
@@ -124,46 +131,51 @@
// Test params for LSMBTree only.
public static final int LSM_BTREE_BULKLOAD_ROUNDS = 5;
public static final int LSM_BTREE_MAX_TREES_TO_MERGE = 10;
+public static final int LSM_BTREE_NUM_MUTABLE_COMPONENTS = 2;
// Mem configuration for RTree.
public static final int RTREE_PAGE_SIZE = 512;
public static final int RTREE_NUM_PAGES = 1000;
-public static final int RTREE_MAX_OPEN_FILES = 10;
+public static final int RTREE_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int RTREE_HYRACKS_FRAME_SIZE = 128;
// Mem configuration for LSMRTree and LSMRTreeWithAntiMatterTuples.
-public static final int LSM_RTREE_DISK_PAGE_SIZE = 256;
-public static final int LSM_RTREE_DISK_NUM_PAGES = 1000;
-public static final int LSM_RTREE_DISK_MAX_OPEN_FILES = 2000;
-public static final int LSM_RTREE_MEM_PAGE_SIZE = 256;
+public static final int LSM_RTREE_DISK_PAGE_SIZE = 512;
+public static final int LSM_RTREE_DISK_NUM_PAGES = 10000;
+public static final int LSM_RTREE_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
+public static final int LSM_RTREE_MEM_PAGE_SIZE = 512;
public static final int LSM_RTREE_MEM_NUM_PAGES = 1000;
public static final int LSM_RTREE_HYRACKS_FRAME_SIZE = 128;
+public static final double LSM_RTREE_BLOOMFILTER_FALSE_POSITIVE_RATE = 0.01;
// Mem configuration for BTree.
public static final int BTREE_PAGE_SIZE = 256;
public static final int BTREE_NUM_PAGES = 100;
-public static final int BTREE_MAX_OPEN_FILES = 10;
+public static final int BTREE_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int BTREE_HYRACKS_FRAME_SIZE = 128;
// Mem configuration for LSMBTree.
public static final int LSM_BTREE_DISK_PAGE_SIZE = 256;
-public static final int LSM_BTREE_DISK_NUM_PAGES = 1000;
-public static final int LSM_BTREE_DISK_MAX_OPEN_FILES = 200;
+public static final int LSM_BTREE_DISK_NUM_PAGES = 10000;
+public static final int LSM_BTREE_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int LSM_BTREE_MEM_PAGE_SIZE = 256;
public static final int LSM_BTREE_MEM_NUM_PAGES = 100;
public static final int LSM_BTREE_HYRACKS_FRAME_SIZE = 128;
+public static final double LSM_BTREE_BLOOMFILTER_FALSE_POSITIVE_RATE = 0.01;
// Mem configuration for Inverted Index.
public static final int INVINDEX_PAGE_SIZE = 32768;
public static final int INVINDEX_NUM_PAGES = 100;
-public static final int INVINDEX_MAX_OPEN_FILES = 10;
+public static final int INVINDEX_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int INVINDEX_HYRACKS_FRAME_SIZE = 32768;
+public static final double LSM_INVINDEX_BLOOMFILTER_FALSE_POSITIVE_RATE = 0.01;
+public static final int LSM_INVINDEX_NUM_MUTABLE_COMPONENTS = 2;
// Mem configuration for Inverted Index.
public static final int LSM_INVINDEX_DISK_PAGE_SIZE = 1024;
-public static final int LSM_INVINDEX_DISK_NUM_PAGES = 1000;
-public static final int LSM_INVINDEX_DISK_MAX_OPEN_FILES = 1000;
+public static final int LSM_INVINDEX_DISK_NUM_PAGES = 10000;
+public static final int LSM_INVINDEX_DISK_MAX_OPEN_FILES = Integer.MAX_VALUE;
public static final int LSM_INVINDEX_MEM_PAGE_SIZE = 1024;
public static final int LSM_INVINDEX_MEM_NUM_PAGES = 100;
public static final int LSM_INVINDEX_HYRACKS_FRAME_SIZE = 32768;
@@ -182,4 +194,13 @@
// Allocate a generous size to make sure we have enough elements for all tests.
public static final int LSM_INVINDEX_SCAN_COUNT_ARRAY_SIZE = 1000000;
public static final int LSM_INVINDEX_MULTITHREAD_NUM_OPERATIONS = 10000;
+
+// Test params for BloomFilter
+public static final int BLOOM_FILTER_NUM_TUPLES_TO_INSERT = 10000;
+
+// Mem configuration for BloomFilter.
+public static final int BLOOM_FILTER_PAGE_SIZE = 256;
+public static final int BLOOM_FILTER_NUM_PAGES = 1000;
+public static final int BLOOM_FILTER_MAX_OPEN_FILES = Integer.MAX_VALUE;
+public static final int BLOOM_FILTER_HYRACKS_FRAME_SIZE = 128;
*/
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
index 44ab0d9..705a976 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/AbstractRTreeExamplesTest.java
@@ -134,12 +134,6 @@
TupleUtils.createIntegerTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x),
Math.max(p1y, p2y), pk1, pk2);
- if (LOGGER.isLoggable(Level.INFO)) {
- if (i % 1000 == 0) {
- LOGGER.info("Inserting " + i + " " + Math.min(p1x, p2x) + " " + Math.min(p1y, p2y) + " "
- + Math.max(p1x, p2x) + " " + Math.max(p1y, p2y) + ", " + pk1 + ", " + pk2);
- }
- }
try {
indexAccessor.insert(tuple);
} catch (TreeIndexException e) {
@@ -494,13 +488,6 @@
TupleUtils.createDoubleTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.min(p1z, p2z),
Math.max(p1x, p2x), Math.max(p1y, p2y), Math.max(p1z, p2z), pk);
- if (LOGGER.isLoggable(Level.INFO)) {
- if (i % 1000 == 0) {
- LOGGER.info("Inserting " + i + " " + Math.min(p1x, p2x) + " " + Math.min(p1y, p2y) + " "
- + Math.min(p1z, p2z) + " " + Math.max(p1x, p2x) + " " + Math.max(p1y, p2y) + " "
- + Math.max(p1z, p2z) + ", " + pk);
- }
- }
try {
indexAccessor.insert(tuple);
} catch (TreeIndexException e) {
@@ -608,11 +595,6 @@
TupleUtils.createIntegerTuple(tb, tuple, Math.min(p1x, p2x), Math.min(p1y, p2y), Math.max(p1x, p2x),
Math.max(p1y, p2y), pk);
- if (LOGGER.isLoggable(Level.INFO)) {
- if (i % 1000 == 0) {
- LOGGER.info("Inserting " + i);
- }
- }
try {
indexAccessor.insert(tuple);
} catch (TreeIndexException e) {
@@ -626,11 +608,6 @@
int delDone = 0;
for (int i = 0; i < numInserts; i++) {
TupleUtils.createIntegerTuple(tb, tuple, p1xs[i], p1ys[i], p2xs[i], p2ys[i], pks[i]);
- if (LOGGER.isLoggable(Level.INFO)) {
- if (i % 1000 == 0) {
- LOGGER.info("Deleting " + i);
- }
- }
try {
indexAccessor.delete(tuple);
delDone++;
diff --git a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestVirtualBufferCacheProvider.java b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestVirtualBufferCacheProvider.java
index 9db21cb..afbb86b 100644
--- a/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestVirtualBufferCacheProvider.java
+++ b/hyracks/hyracks-test-support/src/main/java/edu/uci/ics/hyracks/test/support/TestVirtualBufferCacheProvider.java
@@ -14,6 +14,9 @@
*/
package edu.uci.ics.hyracks.test.support;
+import java.util.ArrayList;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCacheProvider;
@@ -33,7 +36,12 @@
}
@Override
- public IVirtualBufferCache getVirtualBufferCache(IHyracksTaskContext ctx) {
- return new VirtualBufferCache(new HeapBufferAllocator(), pageSize, numPages);
+ public List<IVirtualBufferCache> getVirtualBufferCaches(IHyracksTaskContext ctx) {
+ List<IVirtualBufferCache> vbcs = new ArrayList<IVirtualBufferCache>();
+ for (int i = 0; i < 2; i++) {
+ IVirtualBufferCache vbc = new VirtualBufferCache(new HeapBufferAllocator(), pageSize, numPages / 2);
+ vbcs.add(vbc);
+ }
+ return vbcs;
}
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml
index 09f7302..cacd829 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-bloomfilter-test/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -46,14 +46,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-bloomfilter</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
index b363aac..6fa8414 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -48,14 +48,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
index 262e21c..22d3e6a 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/btree/multithread/BTreeTestWorker.java
@@ -19,8 +19,6 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNotUpdateableException;
import edu.uci.ics.hyracks.storage.am.btree.impls.BTree;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
@@ -31,6 +29,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
public class BTreeTestWorker extends AbstractIndexTestWorker {
@@ -60,7 +60,7 @@
case INSERT:
try {
accessor.insert(tuple);
- } catch (BTreeDuplicateKeyException e) {
+ } catch (TreeIndexDuplicateKeyException e) {
// Ignore duplicate keys, since we get random tuples.
}
break;
@@ -74,7 +74,7 @@
deleteTuple.reset(deleteTb.getFieldEndOffsets(), deleteTb.getByteArray());
try {
accessor.delete(deleteTuple);
- } catch (BTreeNonExistentKeyException e) {
+ } catch (TreeIndexNonExistentKeyException e) {
// Ignore non-existant keys, since we get random tuples.
}
break;
@@ -82,7 +82,7 @@
case UPDATE:
try {
accessor.update(tuple);
- } catch (BTreeNonExistentKeyException e) {
+ } catch (TreeIndexNonExistentKeyException e) {
// Ignore non-existant keys, since we get random tuples.
} catch (BTreeNotUpdateableException e) {
// Ignore not updateable exception due to numKeys == numFields.
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/pom.xml
index 466af51..83d382c 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,20 +39,20 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
</dependencies>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeBulkLoadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeBulkLoadTest.java
index e0a2a46..447c507 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeBulkLoadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeBulkLoadTest.java
@@ -51,10 +51,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeDeleteTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeDeleteTest.java
index 122b788..0303dbd 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeDeleteTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeDeleteTest.java
@@ -51,10 +51,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
index 60e0492..dc55f5d 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeExamplesTest.java
@@ -34,10 +34,10 @@
@Override
protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
int[] bloomFilterKeyFields) throws TreeIndexException {
- return LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits, cmpFactories,
bloomFilterKeyFields, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Before
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeInsertTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeInsertTest.java
index 76a0206..4fc3960 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeInsertTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeInsertTest.java
@@ -51,10 +51,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeLifecycleTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeLifecycleTest.java
index 639fd47..f553b72 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeLifecycleTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeLifecycleTest.java
@@ -53,10 +53,10 @@
@Override
public void setup() throws Exception {
harness.setUp();
- testCtx = LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ testCtx = LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, fieldSerdes.length,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
index = testCtx.getIndex();
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMergeTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMergeTest.java
index e3c86df..f5cfb32 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMergeTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMergeTest.java
@@ -50,10 +50,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
index 77b65ca..2234075 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeModificationOperationCallbackTest.java
@@ -42,13 +42,13 @@
@Override
protected void createIndexInstance() throws Exception {
- index = LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ index = LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
SerdeUtils.serdesToTypeTraits(keySerdes),
SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
NoOpOperationTrackerProvider.INSTANCE.getOperationTracker(null), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider());
+ harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMultiBulkLoadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMultiBulkLoadTest.java
index a7c8b81..277df6b 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMultiBulkLoadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeMultiBulkLoadTest.java
@@ -52,10 +52,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
index e15e80a..f1a5355 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeSearchOperationCallbackTest.java
@@ -49,13 +49,13 @@
@Override
protected void createIndexInstance() throws Exception {
- index = LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ index = LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
SerdeUtils.serdesToTypeTraits(keySerdes),
SerdeUtils.serdesToComparatorFactories(keySerdes, keySerdes.length), bloomFilterKeyFields,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
NoOpOperationTrackerProvider.INSTANCE.getOperationTracker(null), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider());
+ harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeUpdateTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeUpdateTest.java
index db221a2..a5097b4 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeUpdateTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/LSMBTreeUpdateTest.java
@@ -51,10 +51,10 @@
@Override
protected OrderedIndexTestContext createTestContext(ISerializerDeserializer[] fieldSerdes, int numKeys,
BTreeLeafFrameType leafType) throws Exception {
- return LSMBTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, numKeys,
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(), harness.getOperationTracker(),
- harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
index 9dfa713..adee618 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeMultiThreadTest.java
@@ -50,10 +50,10 @@
@Override
protected ITreeIndex createIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] cmpFactories,
int[] bloomFilterKeyFields) throws TreeIndexException {
- return LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMBTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits, cmpFactories,
bloomFilterKeyFields, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeTestWorker.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeTestWorker.java
index 8a37b4e..4a96131 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeTestWorker.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/multithread/LSMBTreeTestWorker.java
@@ -19,8 +19,6 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeDuplicateKeyException;
-import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.btree.exceptions.BTreeNotUpdateableException;
import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
import edu.uci.ics.hyracks.storage.am.common.AbstractIndexTestWorker;
@@ -30,6 +28,8 @@
import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexDuplicateKeyException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTree;
import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTree.LSMBTreeAccessor;
@@ -60,7 +60,7 @@
case INSERT:
try {
accessor.insert(tuple);
- } catch (BTreeDuplicateKeyException e) {
+ } catch (TreeIndexDuplicateKeyException e) {
// Ignore duplicate keys, since we get random tuples.
}
break;
@@ -74,7 +74,7 @@
deleteTuple.reset(deleteTb.getFieldEndOffsets(), deleteTb.getByteArray());
try {
accessor.delete(deleteTuple);
- } catch (BTreeNonExistentKeyException e) {
+ } catch (TreeIndexNonExistentKeyException e) {
// Ignore non-existant keys, since we get random tuples.
}
break;
@@ -82,7 +82,7 @@
case UPDATE:
try {
accessor.update(tuple);
- } catch (BTreeNonExistentKeyException e) {
+ } catch (TreeIndexNonExistentKeyException e) {
// Ignore non-existant keys, since we get random tuples.
} catch (BTreeNotUpdateableException e) {
// Ignore not updateable exception due to numKeys == numFields.
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
index fc6642c..3591f78 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/perf/LSMTreeRunner.java
@@ -17,7 +17,9 @@
import java.io.File;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Date;
+import java.util.List;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
@@ -88,10 +90,15 @@
ioDeviceId = 0;
IFileMapProvider fmp = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- IVirtualBufferCache virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), inMemPageSize,
- inMemNumPages);
+ List<IVirtualBufferCache> virtualBufferCaches = new ArrayList<IVirtualBufferCache>();
+ for (int i = 0; i < 2; i++) {
+ IVirtualBufferCache virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), inMemPageSize,
+ inMemNumPages);
+ virtualBufferCaches.add(virtualBufferCache);
+ }
+
this.ioScheduler = SynchronousScheduler.INSTANCE;
- lsmtree = LSMBTreeUtils.createLSMTree(virtualBufferCache, file, bufferCache, fmp, typeTraits, cmpFactories,
+ lsmtree = LSMBTreeUtils.createLSMTree(virtualBufferCaches, file, bufferCache, fmp, typeTraits, cmpFactories,
bloomFilterKeyFields, bloomFilterFalsePositiveRate, NoMergePolicy.INSTANCE,
new ThreadCountingTracker(), ioScheduler, NoOpIOOperationCallback.INSTANCE);
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
index ab78ee6..6f7c23a 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestContext.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.btree.util;
import java.util.Collection;
+import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
@@ -27,7 +28,7 @@
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
import edu.uci.ics.hyracks.storage.am.lsm.btree.impls.LSMBTree;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -62,20 +63,20 @@
upsertCheckTuple(checkTuple, checkTuples);
}
- public static LSMBTreeTestContext create(IVirtualBufferCache virtualBufferCache, FileReference file,
+ public static LSMBTreeTestContext create(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ISerializerDeserializer[] fieldSerdes,
int numKeyFields, double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy,
- ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws Exception {
+ ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback)
+ throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] cmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes, numKeyFields);
int[] bloomFilterKeyFields = new int[numKeyFields];
for (int i = 0; i < numKeyFields; ++i) {
bloomFilterKeyFields[i] = i;
}
- LSMBTree lsmTree = LSMBTreeUtils.createLSMTree(virtualBufferCache, file, diskBufferCache, diskFileMapProvider,
+ LSMBTree lsmTree = LSMBTreeUtils.createLSMTree(virtualBufferCaches, file, diskBufferCache, diskFileMapProvider,
typeTraits, cmpFactories, bloomFilterKeyFields, bloomFilterFalsePositiveRate, mergePolicy, opTracker,
- ioScheduler, ioOpCallbackProvider);
+ ioScheduler, ioOpCallback);
LSMBTreeTestContext testCtx = new LSMBTreeTestContext(fieldSerdes, lsmTree);
return testCtx;
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestHarness.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestHarness.java
index e643cf9..b88da7b 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestHarness.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-btree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/btree/util/LSMBTreeTestHarness.java
@@ -18,7 +18,9 @@
import java.io.File;
import java.io.FilenameFilter;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Date;
+import java.util.List;
import java.util.Random;
import java.util.logging.Logger;
@@ -30,7 +32,7 @@
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeLeafFrameType;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -60,17 +62,18 @@
protected final int memNumPages;
protected final int hyracksFrameSize;
protected final double bloomFilterFalsePositiveRate;
+ protected final int numMutableComponents;
protected IOManager ioManager;
protected int ioDeviceId;
protected IBufferCache diskBufferCache;
protected IFileMapProvider diskFileMapProvider;
- protected IVirtualBufferCache virtualBufferCache;
+ protected List<IVirtualBufferCache> virtualBufferCaches;
protected IHyracksTaskContext ctx;
protected ILSMIOOperationScheduler ioScheduler;
protected ILSMMergePolicy mergePolicy;
protected ILSMOperationTracker opTracker;
- protected ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected ILSMIOOperationCallback ioOpCallback;
protected final Random rnd = new Random();
protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
@@ -89,21 +92,8 @@
this.ioScheduler = SynchronousScheduler.INSTANCE;
this.mergePolicy = NoMergePolicy.INSTANCE;
this.opTracker = new ThreadCountingTracker();
- this.ioOpCallbackProvider = NoOpIOOperationCallback.INSTANCE;
- }
-
- public LSMBTreeTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
- int memNumPages, int hyracksFrameSize, double bloomFilterFalsePositiveRate) {
- this.diskPageSize = diskPageSize;
- this.diskNumPages = diskNumPages;
- this.diskMaxOpenFiles = diskMaxOpenFiles;
- this.memPageSize = memPageSize;
- this.memNumPages = memNumPages;
- this.hyracksFrameSize = hyracksFrameSize;
- this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
- this.ioScheduler = SynchronousScheduler.INSTANCE;
- this.mergePolicy = NoMergePolicy.INSTANCE;
- this.opTracker = new ThreadCountingTracker();
+ this.ioOpCallback = NoOpIOOperationCallback.INSTANCE;
+ this.numMutableComponents = AccessMethodTestsConfig.LSM_BTREE_NUM_MUTABLE_COMPONENTS;
}
public void setUp() throws HyracksException {
@@ -116,7 +106,12 @@
TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+ virtualBufferCaches = new ArrayList<IVirtualBufferCache>();
+ for (int i = 0; i < numMutableComponents; i++) {
+ IVirtualBufferCache virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), memPageSize,
+ memNumPages / numMutableComponents);
+ virtualBufferCaches.add(virtualBufferCache);
+ }
rnd.setSeed(RANDOM_SEED);
}
@@ -179,8 +174,8 @@
return diskFileMapProvider;
}
- public IVirtualBufferCache getVirtualBufferCache() {
- return virtualBufferCache;
+ public List<IVirtualBufferCache> getVirtualBufferCaches() {
+ return virtualBufferCaches;
}
public double getBoomFilterFalsePositiveRate() {
@@ -211,7 +206,7 @@
return mergePolicy;
}
- public ILSMIOOperationCallbackProvider getIOOperationCallbackProvider() {
- return ioOpCallbackProvider;
+ public ILSMIOOperationCallback getIOOperationCallback() {
+ return ioOpCallback;
}
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-common-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-common-test/pom.xml
index b180994..7a4dc6f 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-common-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-common-test/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,14 +39,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
index 45a5837..ba3e606 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -19,7 +19,7 @@
<parent>
<artifactId>hyracks-tests</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
@@ -40,21 +40,21 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
index 7276464..0907fc0 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
@@ -18,7 +18,9 @@
import java.io.File;
import java.io.FilenameFilter;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Date;
+import java.util.List;
import java.util.Random;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -27,14 +29,12 @@
import edu.uci.ics.hyracks.api.io.FileReference;
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
-import edu.uci.ics.hyracks.storage.am.common.api.IVirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
-import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.VirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.MultitenantVirtualBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
@@ -58,18 +58,18 @@
protected final int memNumPages;
protected final int hyracksFrameSize;
protected final double bloomFilterFalsePositiveRate;
+ protected final int numMutableComponents;
protected IOManager ioManager;
protected int ioDeviceId;
protected IBufferCache diskBufferCache;
protected IFileMapProvider diskFileMapProvider;
- protected IVirtualBufferCache virtualBufferCache;
- protected IVirtualFreePageManager virtualFreePageManager;
+ protected List<IVirtualBufferCache> virtualBufferCaches;
protected IHyracksTaskContext ctx;
protected ILSMIOOperationScheduler ioScheduler;
protected ILSMMergePolicy mergePolicy;
protected ILSMOperationTracker opTracker;
- protected ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected ILSMIOOperationCallback ioOpCallback;
protected final Random rnd = new Random();
protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
@@ -90,21 +90,8 @@
this.ioScheduler = SynchronousScheduler.INSTANCE;
this.mergePolicy = NoMergePolicy.INSTANCE;
this.opTracker = new ThreadCountingTracker();
- this.ioOpCallbackProvider = NoOpIOOperationCallback.INSTANCE;
- }
-
- public LSMInvertedIndexTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
- int memNumPages, int hyracksFrameSize, double bloomFilterFalsePositiveRate) {
- this.diskPageSize = diskPageSize;
- this.diskNumPages = diskNumPages;
- this.diskMaxOpenFiles = diskMaxOpenFiles;
- this.memPageSize = memPageSize;
- this.memNumPages = memNumPages;
- this.hyracksFrameSize = hyracksFrameSize;
- this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
- this.ioScheduler = SynchronousScheduler.INSTANCE;
- this.mergePolicy = NoMergePolicy.INSTANCE;
- this.opTracker = new ThreadCountingTracker();
+ this.ioOpCallback = NoOpIOOperationCallback.INSTANCE;
+ this.numMutableComponents = AccessMethodTestsConfig.LSM_INVINDEX_NUM_MUTABLE_COMPONENTS;
}
public void setUp() throws HyracksException {
@@ -116,10 +103,13 @@
TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- virtualBufferCache = new MultitenantVirtualBufferCache(new VirtualBufferCache(new HeapBufferAllocator(),
- memPageSize, memNumPages));
- virtualBufferCache.open();
- virtualFreePageManager = new VirtualFreePageManager(memNumPages);
+ virtualBufferCaches = new ArrayList<IVirtualBufferCache>();
+ for (int i = 0; i < numMutableComponents; i++) {
+ IVirtualBufferCache virtualBufferCache = new MultitenantVirtualBufferCache(new VirtualBufferCache(
+ new HeapBufferAllocator(), memPageSize, memNumPages / numMutableComponents));
+ virtualBufferCaches.add(virtualBufferCache);
+ virtualBufferCache.open();
+ }
rnd.setSeed(RANDOM_SEED);
invIndexFileRef = ioManager.getIODevices().get(0).createFileReference(onDiskDir + invIndexFileName);
}
@@ -141,7 +131,9 @@
}
}
dir.delete();
- virtualBufferCache.close();
+ for (int i = 0; i < numMutableComponents; i++) {
+ virtualBufferCaches.get(i).close();
+ }
}
public FileReference getInvListsFileRef() {
@@ -188,18 +180,14 @@
return diskFileMapProvider;
}
- public IVirtualBufferCache getVirtualBufferCache() {
- return virtualBufferCache;
+ public List<IVirtualBufferCache> getVirtualBufferCaches() {
+ return virtualBufferCaches;
}
public double getBoomFilterFalsePositiveRate() {
return bloomFilterFalsePositiveRate;
}
- public IVirtualFreePageManager getVirtualFreePageManager() {
- return virtualFreePageManager;
- }
-
public IHyracksTaskContext getHyracksTastContext() {
return ctx;
}
@@ -224,7 +212,7 @@
return mergePolicy;
}
- public ILSMIOOperationCallbackProvider getIOOperationCallbackProvider() {
- return ioOpCallbackProvider;
+ public ILSMIOOperationCallback getIOOperationCallback() {
+ return ioOpCallback;
}
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
index 05dc1b5..059df73 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
@@ -36,6 +36,7 @@
import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.VirtualFreePageManager;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.InvertedIndexException;
@@ -118,18 +119,20 @@
}
// Create index and test context.
IInvertedIndex invIndex;
+ assert harness.getVirtualBufferCaches().size() > 0;
switch (invIndexType) {
case INMEMORY: {
- invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getVirtualBufferCache(),
- harness.getVirtualFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
- tokenCmpFactories, tokenizerFactory, new FileReference(new File(harness.getOnDiskDir())));
+ invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getVirtualBufferCaches().get(0),
+ new VirtualFreePageManager(harness.getVirtualBufferCaches().get(0).getNumPages()),
+ invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
+ new FileReference(new File(harness.getOnDiskDir())));
break;
}
case PARTITIONED_INMEMORY: {
invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness
- .getVirtualBufferCache(), harness.getVirtualFreePageManager(), invListTypeTraits,
- invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory, new FileReference(
- new File(harness.getOnDiskDir())));
+ .getVirtualBufferCaches().get(0), new VirtualFreePageManager(harness.getVirtualBufferCaches()
+ .get(0).getNumPages()), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory, new FileReference(new File(harness.getOnDiskDir())));
break;
}
case ONDISK: {
@@ -145,21 +148,19 @@
break;
}
case LSM: {
- invIndex = InvertedIndexUtils.createLSMInvertedIndex(harness.getVirtualBufferCache(),
+ invIndex = InvertedIndexUtils.createLSMInvertedIndex(harness.getVirtualBufferCaches(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(),
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
break;
}
case PARTITIONED_LSM: {
- invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(harness.getVirtualBufferCache(),
+ invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(harness.getVirtualBufferCaches(),
harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
tokenCmpFactories, tokenizerFactory, harness.getDiskBufferCache(), harness.getOnDiskDir(),
harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
break;
}
default: {
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/pom.xml
index 14863e5..3d4fece 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/pom.xml
@@ -19,7 +19,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -39,14 +39,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeBulkLoadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeBulkLoadTest.java
index 3fcb4e5..dadd08c 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeBulkLoadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeBulkLoadTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, valueProviderFactories,
numKeys, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeDeleteTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeDeleteTest.java
index 467916e..0b92cd3 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeDeleteTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeDeleteTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, valueProviderFactories,
numKeys, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeExamplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeExamplesTest.java
index a35170c..b04ae16 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeExamplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeExamplesTest.java
@@ -37,11 +37,11 @@
protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories,
IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType) throws TreeIndexException {
- return LSMRTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits, rtreeCmpFactories,
btreeCmpFactories, valueProviderFactories, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(),
harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider(),
+ harness.getIOOperationCallback(),
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeInsertTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeInsertTest.java
index 44da0c0..35c6a6b 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeInsertTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeInsertTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, valueProviderFactories,
numKeys, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeLifecycleTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeLifecycleTest.java
index b97bebb..6ef2e94 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeLifecycleTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeLifecycleTest.java
@@ -63,10 +63,10 @@
@Override
public void setup() throws Exception {
harness.setUp();
- testCtx = LSMRTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ testCtx = LSMRTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, valueProviderFactories,
numKeys, RTreePolicyType.RTREE, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
index = testCtx.getIndex();
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeMergeTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeMergeTest.java
index b13ce4d..94528e7 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeMergeTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeMergeTest.java
@@ -53,10 +53,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeTestContext.create(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeTestContext.create(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), fieldSerdes, valueProviderFactories,
numKeys, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(), harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesBulkLoadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesBulkLoadTest.java
index 2baf485..22a6eb8 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesBulkLoadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesBulkLoadTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCache(),
+ return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
fieldSerdes, valueProviderFactories, numKeys, rtreePolicyType, harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesDeleteTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesDeleteTest.java
index 9c440cc..10aec7e 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesDeleteTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesDeleteTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCache(),
+ return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
fieldSerdes, valueProviderFactories, numKeys, rtreePolicyType, harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesExamplesTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesExamplesTest.java
index 909d0cd..a51abef 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesExamplesTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesExamplesTest.java
@@ -37,11 +37,11 @@
protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories,
IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType) throws TreeIndexException {
- return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(harness.getVirtualBufferCache(),
+ return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits,
rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType,
harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider(),
+ harness.getIOOperationCallback(),
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesInsertTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesInsertTest.java
index 6b35192..e5b33cc 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesInsertTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesInsertTest.java
@@ -54,10 +54,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCache(),
+ return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
fieldSerdes, valueProviderFactories, numKeys, rtreePolicyType, harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesLifecycleTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesLifecycleTest.java
index 6dcbdf2..10ae5e1 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesLifecycleTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesLifecycleTest.java
@@ -63,10 +63,10 @@
@Override
public void setup() throws Exception {
harness.setUp();
- testCtx = LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCache(),
+ testCtx = LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
fieldSerdes, valueProviderFactories, numKeys, RTreePolicyType.RTREE, harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
index = testCtx.getIndex();
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesMergeTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesMergeTest.java
index dc81fc3..21d3759 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesMergeTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/LSMRTreeWithAntiMatterTuplesMergeTest.java
@@ -53,10 +53,10 @@
protected AbstractRTreeTestContext createTestContext(ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeys, RTreePolicyType rtreePolicyType)
throws Exception {
- return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCache(),
+ return LSMRTreeWithAntiMatterTuplesTestContext.create(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(),
fieldSerdes, valueProviderFactories, numKeys, rtreePolicyType, harness.getMergePolicy(),
- harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallbackProvider());
+ harness.getOperationTracker(), harness.getIOScheduler(), harness.getIOOperationCallback());
}
@Override
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeMultiThreadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeMultiThreadTest.java
index 850d540..077bd45 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeMultiThreadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeMultiThreadTest.java
@@ -57,11 +57,11 @@
protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories,
IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType) throws TreeIndexException {
- return LSMRTreeUtils.createLSMTree(harness.getVirtualBufferCache(), harness.getFileReference(),
+ return LSMRTreeUtils.createLSMTree(harness.getVirtualBufferCaches(), harness.getFileReference(),
harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits, rtreeCmpFactories,
btreeCmpFactories, valueProviderFactories, rtreePolicyType, harness.getBoomFilterFalsePositiveRate(),
harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider(),
+ harness.getIOOperationCallback(),
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeWithAntiMatterTuplesMultiThreadTest.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeWithAntiMatterTuplesMultiThreadTest.java
index 4a6b462..ef871dd 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeWithAntiMatterTuplesMultiThreadTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/multithread/LSMRTreeWithAntiMatterTuplesMultiThreadTest.java
@@ -57,11 +57,11 @@
protected ITreeIndex createTreeIndex(ITypeTraits[] typeTraits, IBinaryComparatorFactory[] rtreeCmpFactories,
IBinaryComparatorFactory[] btreeCmpFactories, IPrimitiveValueProviderFactory[] valueProviderFactories,
RTreePolicyType rtreePolicyType) throws TreeIndexException {
- return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(harness.getVirtualBufferCache(),
+ return LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(harness.getVirtualBufferCaches(),
harness.getFileReference(), harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), typeTraits,
rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType,
harness.getMergePolicy(), harness.getOperationTracker(), harness.getIOScheduler(),
- harness.getIOOperationCallbackProvider(),
+ harness.getIOOperationCallback(),
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestContext.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestContext.java
index 8c59e31..42e793b 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestContext.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestContext.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.util;
import java.util.Collection;
+import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
@@ -25,7 +26,7 @@
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -66,20 +67,19 @@
return lsmTree.getComparatorFactories();
}
- public static LSMRTreeTestContext create(IVirtualBufferCache virtualBufferCache, FileReference file,
+ public static LSMRTreeTestContext create(List<IVirtualBufferCache> virtualBufferCaches, FileReference file,
IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider, ISerializerDeserializer[] fieldSerdes,
IPrimitiveValueProviderFactory[] valueProviderFactories, int numKeyFields, RTreePolicyType rtreePolicyType,
double bloomFilterFalsePositiveRate, ILSMMergePolicy mergePolicy, ILSMOperationTracker opTracker,
- ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallbackProvider ioOpCallbackProvider)
- throws Exception {
+ ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback) throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] rtreeCmpFactories = SerdeUtils
.serdesToComparatorFactories(fieldSerdes, numKeyFields);
IBinaryComparatorFactory[] btreeCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
fieldSerdes.length);
- LSMRTree lsmTree = LSMRTreeUtils.createLSMTree(virtualBufferCache, file, diskBufferCache, diskFileMapProvider,
+ LSMRTree lsmTree = LSMRTreeUtils.createLSMTree(virtualBufferCaches, file, diskBufferCache, diskFileMapProvider,
typeTraits, rtreeCmpFactories, btreeCmpFactories, valueProviderFactories, rtreePolicyType,
- bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider,
+ bloomFilterFalsePositiveRate, mergePolicy, opTracker, ioScheduler, ioOpCallback,
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
LSMRTreeTestContext testCtx = new LSMRTreeTestContext(fieldSerdes, lsmTree);
return testCtx;
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestHarness.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestHarness.java
index 118b1bc..66e79a2 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestHarness.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeTestHarness.java
@@ -18,7 +18,9 @@
import java.io.File;
import java.io.FilenameFilter;
import java.text.SimpleDateFormat;
+import java.util.ArrayList;
import java.util.Date;
+import java.util.List;
import java.util.Random;
import java.util.logging.Logger;
@@ -29,7 +31,7 @@
import edu.uci.ics.hyracks.api.io.IODeviceHandle;
import edu.uci.ics.hyracks.control.nc.io.IOManager;
import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -57,15 +59,16 @@
protected final int memNumPages;
protected final int hyracksFrameSize;
protected final double bloomFilterFalsePositiveRate;
+ protected final int numMutableComponents;
protected IOManager ioManager;
protected int ioDeviceId;
protected IBufferCache diskBufferCache;
protected IFileMapProvider diskFileMapProvider;
- protected IVirtualBufferCache virtualBufferCache;
+ protected List<IVirtualBufferCache> virtualBufferCaches;
protected IHyracksTaskContext ctx;
protected ILSMIOOperationScheduler ioScheduler;
- protected ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+ protected ILSMIOOperationCallback ioOpCallback;
protected ILSMMergePolicy mergePolicy;
protected ILSMOperationTracker opTracker;
@@ -86,21 +89,8 @@
this.ioScheduler = SynchronousScheduler.INSTANCE;
this.mergePolicy = NoMergePolicy.INSTANCE;
this.opTracker = new ThreadCountingTracker();
- this.ioOpCallbackProvider = NoOpIOOperationCallback.INSTANCE;
- }
-
- public LSMRTreeTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
- int memNumPages, int hyracksFrameSize, double bloomFilterFalsePositiveRate) {
- this.diskPageSize = diskPageSize;
- this.diskNumPages = diskNumPages;
- this.diskMaxOpenFiles = diskMaxOpenFiles;
- this.memPageSize = memPageSize;
- this.memNumPages = memNumPages;
- this.bloomFilterFalsePositiveRate = bloomFilterFalsePositiveRate;
- this.hyracksFrameSize = hyracksFrameSize;
- this.ioScheduler = SynchronousScheduler.INSTANCE;
- this.mergePolicy = NoMergePolicy.INSTANCE;
- this.opTracker = new ThreadCountingTracker();
+ this.ioOpCallback = NoOpIOOperationCallback.INSTANCE;
+ this.numMutableComponents = AccessMethodTestsConfig.LSM_RTREE_NUM_MUTABLE_COMPONENTS;
}
public void setUp() throws HyracksException {
@@ -113,7 +103,12 @@
TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
- virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+ virtualBufferCaches = new ArrayList<IVirtualBufferCache>();
+ for (int i = 0; i < numMutableComponents; i++) {
+ IVirtualBufferCache virtualBufferCache = new VirtualBufferCache(new HeapBufferAllocator(), memPageSize,
+ memNumPages / numMutableComponents);
+ virtualBufferCaches.add(virtualBufferCache);
+ }
rnd.setSeed(RANDOM_SEED);
}
@@ -175,8 +170,8 @@
return diskFileMapProvider;
}
- public IVirtualBufferCache getVirtualBufferCache() {
- return virtualBufferCache;
+ public List<IVirtualBufferCache> getVirtualBufferCaches() {
+ return virtualBufferCaches;
}
public double getBoomFilterFalsePositiveRate() {
@@ -211,7 +206,7 @@
return mergePolicy;
}
- public ILSMIOOperationCallbackProvider getIOOperationCallbackProvider() {
- return ioOpCallbackProvider;
+ public ILSMIOOperationCallback getIOOperationCallback() {
+ return ioOpCallback;
}
}
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeWithAntiMatterTuplesTestContext.java b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeWithAntiMatterTuplesTestContext.java
index 95716f6..6a0a6bb 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeWithAntiMatterTuplesTestContext.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-lsm-rtree-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/rtree/util/LSMRTreeWithAntiMatterTuplesTestContext.java
@@ -16,6 +16,7 @@
package edu.uci.ics.hyracks.storage.am.lsm.rtree.util;
import java.util.Collection;
+import java.util.List;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
@@ -25,7 +26,7 @@
import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
import edu.uci.ics.hyracks.storage.am.common.api.IPrimitiveValueProviderFactory;
import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndex;
-import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallback;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTracker;
@@ -67,20 +68,20 @@
return lsmTree.getComparatorFactories();
}
- public static LSMRTreeWithAntiMatterTuplesTestContext create(IVirtualBufferCache virtualBufferCache,
+ public static LSMRTreeWithAntiMatterTuplesTestContext create(List<IVirtualBufferCache> virtualBufferCaches,
FileReference file, IBufferCache diskBufferCache, IFileMapProvider diskFileMapProvider,
ISerializerDeserializer[] fieldSerdes, IPrimitiveValueProviderFactory[] valueProviderFactories,
int numKeyFields, RTreePolicyType rtreePolicyType, ILSMMergePolicy mergePolicy,
- ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler,
- ILSMIOOperationCallbackProvider ioOpCallbackProvider) throws Exception {
+ ILSMOperationTracker opTracker, ILSMIOOperationScheduler ioScheduler, ILSMIOOperationCallback ioOpCallback)
+ throws Exception {
ITypeTraits[] typeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
IBinaryComparatorFactory[] rtreeCmpFactories = SerdeUtils
.serdesToComparatorFactories(fieldSerdes, numKeyFields);
IBinaryComparatorFactory[] btreeCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
fieldSerdes.length);
- LSMRTreeWithAntiMatterTuples lsmTree = LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(virtualBufferCache,
+ LSMRTreeWithAntiMatterTuples lsmTree = LSMRTreeUtils.createLSMTreeWithAntiMatterTuples(virtualBufferCaches,
file, diskBufferCache, diskFileMapProvider, typeTraits, rtreeCmpFactories, btreeCmpFactories,
- valueProviderFactories, rtreePolicyType, mergePolicy, opTracker, ioScheduler, ioOpCallbackProvider,
+ valueProviderFactories, rtreePolicyType, mergePolicy, opTracker, ioScheduler, ioOpCallback,
LSMRTreeUtils.proposeBestLinearizer(typeTraits, rtreeCmpFactories.length));
LSMRTreeWithAntiMatterTuplesTestContext testCtx = new LSMRTreeWithAntiMatterTuplesTestContext(fieldSerdes,
lsmTree);
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
index 8c5178e..547a3a4 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-am-rtree-test/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -48,14 +48,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-rtree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>test</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml b/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
index 0930280..d9fdc26 100644
--- a/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
+++ b/hyracks/hyracks-tests/hyracks-storage-common-test/pom.xml
@@ -20,7 +20,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -48,13 +48,13 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-test-support</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hyracks/hyracks-tests/pom.xml b/hyracks/hyracks-tests/pom.xml
index 2990e53..441b76c 100644
--- a/hyracks/hyracks-tests/pom.xml
+++ b/hyracks/hyracks-tests/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<modules>
diff --git a/hyracks/pom.xml b/hyracks/pom.xml
index 6282fdd..a4a54f2 100644
--- a/hyracks/pom.xml
+++ b/hyracks/pom.xml
@@ -17,7 +17,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hyracks</name>
diff --git a/pom.xml b/pom.xml
index 3428836..a851758 100644
--- a/pom.xml
+++ b/pom.xml
@@ -17,7 +17,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>fullstack</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hyracks-ecosystem-full-stack</name>
diff --git a/pregelix/build.sh b/pregelix/build.sh
new file mode 100644
index 0000000..e213181
--- /dev/null
+++ b/pregelix/build.sh
@@ -0,0 +1,12 @@
+rm -rf dist
+mkdir dist
+
+hadoop_versions=(0.20.2 0.23.1 0.23.6 1.0.4 cdh-4.1 cdh-4.2)
+cd ../
+for v in ${hadoop_versions[@]}
+do
+ #echo mvn clean package -DskipTests=true -Dhadoop=${v}
+ mvn clean package -DskipTests=true -Dhadoop=${v}
+ #echo mv pregelix/pregelix-dist/target/pregelix-dist-*-binary-assembly.zip pregelix/dist/pregelix-dist-binary-assembley-hdfs-${v}.zip
+ mv pregelix/pregelix-dist/target/pregelix-dist-*-binary-assembly.zip pregelix/dist/pregelix-dist-binary-assembley-hdfs-${v}.zip
+done
diff --git a/pregelix/pom.xml b/pregelix/pom.xml
index c0c3822..d748d1f 100644
--- a/pregelix/pom.xml
+++ b/pregelix/pom.xml
@@ -12,17 +12,16 @@
! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
! See the License for the specific language governing permissions and
! limitations under the License.
- !-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ !--><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>pregelix</name>
<properties>
- <jvm.extraargs />
+ <jvm.extraargs/>
</properties>
<profiles>
@@ -107,6 +106,7 @@
<module>pregelix-runtime</module>
<module>pregelix-core</module>
<module>pregelix-example</module>
+ <module>pregelix-benchmark</module>
<module>pregelix-dist</module>
</modules>
</project>
diff --git a/pregelix/pregelix-api/pom.xml b/pregelix/pregelix-api/pom.xml
index 305b50c..0152a15 100644
--- a/pregelix/pregelix-api/pom.xml
+++ b/pregelix/pregelix-api/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<properties>
@@ -82,7 +82,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -95,7 +95,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-hdfs-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/GlobalAggregator.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/GlobalAggregator.java
index 08c7151..5ea6413 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/GlobalAggregator.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/GlobalAggregator.java
@@ -20,6 +20,7 @@
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
/**
* This is the abstract class to implement for aggregating the state of all the vertices globally in the graph.
@@ -39,7 +40,7 @@
*/
@SuppressWarnings("rawtypes")
-public abstract class GlobalAggregator<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable, P extends Writable, F extends Writable> {
+public abstract class GlobalAggregator<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable, P extends Writable, F extends Writable> {
/**
* initialize aggregator
*/
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MessageCombiner.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MessageCombiner.java
index f5daf99..fa03c0c 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MessageCombiner.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MessageCombiner.java
@@ -19,6 +19,7 @@
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
/**
* This is the abstract class to implement for combining of messages that are sent to the same vertex.
@@ -36,7 +37,7 @@
* the type of the partially combined messages
*/
@SuppressWarnings("rawtypes")
-public abstract class MessageCombiner<I extends WritableComparable, M extends Writable, P extends Writable> {
+public abstract class MessageCombiner<I extends WritableComparable, M extends WritableSizable, P extends Writable> {
/**
* initialize combiner
@@ -82,4 +83,36 @@
* @return the final message List
*/
public abstract MsgList<M> finishFinal();
+
+ /**
+ * init the combiner for all segmented bags for one key
+ *
+ * @return the final message List
+ */
+ public void initAll(MsgList providedMsgList) {
+ init(providedMsgList);
+ }
+
+ /**
+ * finish final combiner for all segmented bags for one key
+ *
+ * @return the final message List
+ */
+ public MsgList<M> finishFinalAll() {
+ return finishFinal();
+ }
+
+ /**
+ * @return the accumulated byte size
+ */
+ public int estimateAccumulatedStateByteSizePartial(I vertexIndex, M msg) throws HyracksDataException {
+ return 0;
+ }
+
+ /**
+ * @return the accumulated byte size
+ */
+ public int estimateAccumulatedStateByteSizeFinal(I vertexIndex, P partialAggregate) throws HyracksDataException {
+ return 0;
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MsgList.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MsgList.java
index 104f396..51b62e4 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MsgList.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/MsgList.java
@@ -15,8 +15,11 @@
package edu.uci.ics.pregelix.api.graph;
-import org.apache.hadoop.io.Writable;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
import edu.uci.ics.pregelix.api.util.ArrayListWritable;
import edu.uci.ics.pregelix.api.util.BspUtils;
@@ -27,9 +30,11 @@
* @param <M>
* message type
*/
-public class MsgList<M extends Writable> extends ArrayListWritable<M> {
+public class MsgList<M extends WritableSizable> extends ArrayListWritable<M> {
/** Defining a layout version for a serializable class. */
private static final long serialVersionUID = 1L;
+ private byte start = 1;
+ private byte end = 2;
/**
* Default constructor.s
@@ -43,4 +48,34 @@
public void setClass() {
setClass((Class<M>) BspUtils.getMessageValueClass(getConf()));
}
+
+ @Override
+ public void write(DataOutput output) throws IOException {
+ output.writeByte(start | end);
+ super.write(output);
+ }
+
+ @Override
+ public void readFields(DataInput input) throws IOException {
+ byte startEnd = input.readByte();
+ this.start = (byte) (startEnd & 1);
+ this.end = (byte) (startEnd & 2);
+ super.readFields(input);
+ }
+
+ public final void setSegmentStart(boolean segStart) {
+ this.start = (byte) (segStart ? 1 : 0);
+ }
+
+ public final void setSegmentEnd(boolean segEnd) {
+ this.end = (byte) (segEnd ? 2 : 0);
+ }
+
+ public boolean segmentStart() {
+ return start == 1 ? true : false;
+ }
+
+ public boolean segmentEnd() {
+ return end == 2 ? true : false;
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
index 4175078..26cb8d0 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
@@ -24,15 +24,18 @@
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
import edu.uci.ics.pregelix.api.util.BspUtils;
-import edu.uci.ics.pregelix.api.util.SerDeUtils;
+import edu.uci.ics.pregelix.api.util.JobStateUtils;
/**
* User applications should all inherit {@link Vertex}, and implement their own
@@ -48,7 +51,7 @@
* Message value type
*/
@SuppressWarnings("rawtypes")
-public abstract class Vertex<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+public abstract class Vertex<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
implements Writable {
private static long superstep = 0;
/** Class-wide number of vertices */
@@ -75,6 +78,8 @@
private boolean hasMessage = false;
/** created new vertex */
private boolean createdNewLiveVertex = false;
+ /** terminate the partition */
+ private boolean terminatePartition = false;
/**
* use object pool for re-using objects
@@ -87,12 +92,23 @@
private int usedValue = 0;
/**
- * The key method that users need to implement
+ * The key method that users need to implement to process
+ * incoming messages in each superstep.
+ * 1. In a superstep, this method can be called multiple times in a continuous manner for a single
+ * vertex, each of which is to process a batch of messages. (Note that
+ * this only happens for the case when the mssages for a single vertex
+ * exceed one frame.)
+ * 2. In each superstep, before any invocation of this method for a vertex,
+ * open() is called; after all the invocations of this method for the vertex,
+ * close is called.
+ * 3. In each partition, the vertex Java object is reused
+ * for all the vertice to be processed in the same partition. (The model
+ * is the same as the key-value objects in hadoop map tasks.)
*
* @param msgIterator
* an iterator of incoming messages
*/
- public abstract void compute(Iterator<M> msgIterator);
+ public abstract void compute(Iterator<M> msgIterator) throws Exception;
/**
* Add an edge for the vertex.
@@ -254,7 +270,7 @@
delegate.setVertex(this);
}
destEdgeList.clear();
- long edgeMapSize = SerDeUtils.readVLong(in);
+ long edgeMapSize = WritableUtils.readVLong(in);
for (long i = 0; i < edgeMapSize; ++i) {
Edge<I, E> edge = allocateEdge();
edge.setConf(getContext().getConfiguration());
@@ -262,7 +278,7 @@
addEdge(edge);
}
msgList.clear();
- long msgListSize = SerDeUtils.readVLong(in);
+ long msgListSize = WritableUtils.readVLong(in);
for (long i = 0; i < msgListSize; ++i) {
M msg = allocateMessage();
msg.readFields(in);
@@ -281,11 +297,11 @@
if (vertexValue != null) {
vertexValue.write(out);
}
- SerDeUtils.writeVLong(out, destEdgeList.size());
+ WritableUtils.writeVLong(out, destEdgeList.size());
for (Edge<I, E> edge : destEdgeList) {
edge.write(out);
}
- SerDeUtils.writeVLong(out, msgList.size());
+ WritableUtils.writeVLong(out, msgList.size());
for (M msg : msgList) {
msg.write(out);
}
@@ -569,4 +585,61 @@
Vertex.context = context;
}
+ @Override
+ public int hashCode() {
+ return vertexId.hashCode();
+ }
+
+ @Override
+ public boolean equals(Object object) {
+ Vertex vertex = (Vertex) object;
+ return vertexId.equals(vertex.getVertexId());
+ }
+
+ /**
+ * called immediately before invocations of compute() on a vertex
+ * Users can override this method to initiate the state for a vertex
+ * before the compute() invocations
+ */
+ public void open() {
+
+ }
+
+ /**
+ * called immediately after all the invocations of compute() on a vertex
+ * Users can override this method to initiate the state for a vertex
+ * before the compute() invocations
+ */
+ public void close() {
+
+ }
+
+ /**
+ * Terminate the current partition where the current vertex stays in.
+ * This will immediately take effect and the upcoming vertice in the
+ * same partition cannot be processed.
+ */
+ protected final void terminatePartition() {
+ voteToHalt();
+ terminatePartition = true;
+ }
+
+ /**
+ * Terminate the Pregelix job.
+ * This will take effect only when the current iteration completed.
+ *
+ * @throws Exception
+ */
+ protected void terminateJob() throws Exception {
+ Configuration conf = getContext().getConfiguration();
+ JobStateUtils.writeForceTerminationState(conf, BspUtils.getJobId(conf));
+ }
+
+ /***
+ * @return true if the partition is terminated; false otherwise
+ */
+ public boolean isPartitionTerminated() {
+ return terminatePartition;
+ }
+
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/Sizable.java
similarity index 64%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/Sizable.java
index cde5022..568500b 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/Sizable.java
@@ -12,17 +12,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.api.io;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+/**
+ * @author yingyib
+ */
+public interface Sizable {
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
-
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
- }
-
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
+ public int sizeInBytes();
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexInputFormat.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexInputFormat.java
index c841b1a..73af190 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexInputFormat.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexInputFormat.java
@@ -40,7 +40,7 @@
* Message data
*/
@SuppressWarnings("rawtypes")
-public abstract class VertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> {
+public abstract class VertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> {
/**
* Logically split the vertices for a graph processing application.
* Each {@link InputSplit} is then assigned to a worker for processing.
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexReader.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexReader.java
index e6c62ba..ba8b561 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexReader.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/VertexReader.java
@@ -39,7 +39,7 @@
* Message data
*/
@SuppressWarnings("rawtypes")
-public interface VertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> {
+public interface VertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> {
/**
* Use the input split and context t o setup reading the vertices.
* Guaranteed to be called prior to any other function.
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/WritableSizable.java
similarity index 64%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/WritableSizable.java
index cde5022..ee13f76 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/WritableSizable.java
@@ -12,17 +12,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.api.io;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+import org.apache.hadoop.io.Writable;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+/**
+ * @author yingyib
+ */
+public interface WritableSizable extends Writable, Sizable {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
- }
-
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexInputFormat.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexInputFormat.java
index 985bcff..1d3c427 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexInputFormat.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexInputFormat.java
@@ -26,13 +26,14 @@
import edu.uci.ics.pregelix.api.io.BasicGenInputSplit;
import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
/**
* This VertexInputFormat is meant for testing/debugging. It simply generates
* some vertex data that can be consumed by test applications.
*/
@SuppressWarnings("rawtypes")
-public abstract class GeneratedVertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+public abstract class GeneratedVertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
extends VertexInputFormat<I, V, E, M> {
@Override
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexReader.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexReader.java
index 92c8728..376d45d 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexReader.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/generated/GeneratedVertexReader.java
@@ -25,6 +25,7 @@
import edu.uci.ics.pregelix.api.io.BasicGenInputSplit;
import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
/**
* Used by GeneratedVertexInputFormat to read some generated data
@@ -37,7 +38,7 @@
* Edge value
*/
@SuppressWarnings("rawtypes")
-public abstract class GeneratedVertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+public abstract class GeneratedVertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
implements VertexReader<I, V, E, M> {
/** Records read so far */
protected long recordsRead = 0;
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexInputFormat.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexInputFormat.java
new file mode 100644
index 0000000..22d3b27
--- /dev/null
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexInputFormat.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.api.io.internal;
+
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.RecordReader;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+
+/**
+ * @author yingyib
+ */
+@SuppressWarnings("rawtypes")
+public class InternalVertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
+ extends VertexInputFormat<I, V, E, M> {
+ /** Uses the SequenceFileInputFormat to do everything */
+ private SequenceFileInputFormat sequenceInputFormat = new SequenceFileInputFormat();
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public List<InputSplit> getSplits(JobContext context, int numWorkers) throws IOException, InterruptedException {
+ return sequenceInputFormat.getSplits(context);
+ }
+
+ @Override
+ public VertexReader<I, V, E, M> createVertexReader(final InputSplit split, final TaskAttemptContext context)
+ throws IOException {
+ return new VertexReader<I, V, E, M>() {
+ RecordReader recordReader = sequenceInputFormat.createRecordReader(split, context);
+
+ @Override
+ public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ recordReader.initialize(inputSplit, context);
+ }
+
+ @Override
+ public boolean nextVertex() throws IOException, InterruptedException {
+ return recordReader.nextKeyValue();
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Vertex<I, V, E, M> getCurrentVertex() throws IOException, InterruptedException {
+ return (Vertex<I, V, E, M>) recordReader.getCurrentValue();
+ }
+
+ @Override
+ public void close() throws IOException {
+ recordReader.close();
+ }
+
+ @Override
+ public float getProgress() throws IOException, InterruptedException {
+ return 0;
+ }
+
+ };
+ }
+
+}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexOutputFormat.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexOutputFormat.java
new file mode 100644
index 0000000..b603037
--- /dev/null
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/internal/InternalVertexOutputFormat.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.api.io.internal;
+
+import java.io.IOException;
+
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+
+/**
+ * @author yingyib
+ */
+@SuppressWarnings("rawtypes")
+public class InternalVertexOutputFormat<I extends WritableComparable, V extends Writable, E extends Writable> extends
+ VertexOutputFormat<I, V, E> {
+ private SequenceFileOutputFormat sequenceOutputFormat = new SequenceFileOutputFormat();
+
+ @Override
+ public VertexWriter<I, V, E> createVertexWriter(final TaskAttemptContext context) throws IOException,
+ InterruptedException {
+ return new VertexWriter<I, V, E>() {
+ private RecordWriter recordWriter = sequenceOutputFormat.getRecordWriter(context);
+ private NullWritable key = NullWritable.get();
+
+ @Override
+ public void initialize(TaskAttemptContext context) throws IOException, InterruptedException {
+
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void writeVertex(Vertex<I, V, E, ?> vertex) throws IOException, InterruptedException {
+ recordWriter.write(key, vertex);
+ }
+
+ @Override
+ public void close(TaskAttemptContext context) throws IOException, InterruptedException {
+ recordWriter.close(context);
+ }
+
+ };
+ }
+
+ @Override
+ public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
+
+ }
+
+ @Override
+ public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
+ return new OutputCommitter() {
+
+ @Override
+ public void abortTask(TaskAttemptContext arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void cleanupJob(JobContext arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public void commitTask(TaskAttemptContext arg0) throws IOException {
+ // TODO Auto-generated method stub
+
+ }
+
+ @Override
+ public boolean needsTaskCommit(TaskAttemptContext arg0) throws IOException {
+ return false;
+ }
+
+ @Override
+ public void setupJob(JobContext arg0) throws IOException {
+
+ }
+
+ @Override
+ public void setupTask(TaskAttemptContext arg0) throws IOException {
+
+ }
+
+ };
+ }
+
+}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/text/TextVertexInputFormat.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/text/TextVertexInputFormat.java
index 2254ae4..0faf516 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/text/TextVertexInputFormat.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/io/text/TextVertexInputFormat.java
@@ -30,6 +30,7 @@
import edu.uci.ics.pregelix.api.io.VertexInputFormat;
import edu.uci.ics.pregelix.api.io.VertexReader;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
/**
* Abstract class that users should subclass to use their own text based vertex
@@ -45,7 +46,7 @@
* Message value
*/
@SuppressWarnings("rawtypes")
-public abstract class TextVertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+public abstract class TextVertexInputFormat<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
extends VertexInputFormat<I, V, E, M> {
/** Uses the TextInputFormat to do everything */
protected TextInputFormat textInputFormat = new TextInputFormat();
@@ -62,7 +63,7 @@
* @param <E>
* Edge value
*/
- public static abstract class TextVertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+ public static abstract class TextVertexReader<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
implements VertexReader<I, V, E, M> {
/** Internal line record reader */
private final RecordReader<LongWritable, Text> lineRecordReader;
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/ICheckpointHook.java
similarity index 64%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/ICheckpointHook.java
index cde5022..9d6eb5a 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/ICheckpointHook.java
@@ -12,17 +12,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.api.job;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+/**
+ * @author yingyib
+ */
+public interface ICheckpointHook {
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+ public boolean checkpoint(int superstep);
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
- }
-
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
index 4cddaf0..6549c52 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
@@ -74,6 +74,22 @@
public static final String FRAME_SIZE = "pregelix.framesize";
/** update intensive */
public static final String UPDATE_INTENSIVE = "pregelix.updateIntensive";
+ /** the check point hook */
+ public static final String CKP_CLASS = "pregelix.checkpointHook";
+ /** the check point hook */
+ public static final String RECOVERY_COUNT = "pregelix.recoveryCount";
+ /** the checkpoint interval */
+ public static final String CKP_INTERVAL = "pregelix.ckpinterval";
+
+ /**
+ * Construct a Pregelix job from an existing configuration
+ *
+ * @param conf
+ * @throws IOException
+ */
+ public PregelixJob(Configuration conf) throws IOException {
+ super(conf);
+ }
/**
* Constructor that will instantiate the configuration
@@ -198,7 +214,39 @@
*
* @param updateHeavyFlag
*/
- final public void setMutationOrVariableSizedUpdateHeavy(boolean variableSizedUpdateHeavyFlag) {
+ final public void setLSMStorage(boolean variableSizedUpdateHeavyFlag) {
getConfiguration().setBoolean(UPDATE_INTENSIVE, variableSizedUpdateHeavyFlag);
}
+
+ /**
+ * Users can provide an ICheckpointHook implementation to specify when to do checkpoint
+ *
+ * @param ckpClass
+ */
+ final public void setCheckpointHook(Class<?> ckpClass) {
+ getConfiguration().setClass(CKP_CLASS, ckpClass, ICheckpointHook.class);
+ }
+
+ /**
+ * Users can provide an ICheckpointHook implementation to specify when to do checkpoint
+ *
+ * @param ckpClass
+ */
+ final public void setRecoveryCount(int recoveryCount) {
+ getConfiguration().setInt(RECOVERY_COUNT, recoveryCount);
+ }
+
+ /**
+ * Users can set the interval of checkpointing
+ *
+ * @param ckpInterval
+ */
+ final public void setCheckpointingInterval(int ckpInterval) {
+ getConfiguration().setInt(CKP_INTERVAL, ckpInterval);
+ }
+
+ @Override
+ public String toString() {
+ return getJobName();
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ArrayListWritable.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ArrayListWritable.java
index 7a9e5d5..1683541 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ArrayListWritable.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ArrayListWritable.java
@@ -141,9 +141,6 @@
used = 0;
this.clear();
int numValues = in.readInt(); // read number of values
- if (numValues > 100) {
- System.out.println("num values: " + numValues);
- }
for (int i = 0; i < numValues; i++) {
M value = allocateValue();
value.readFields(in); // read a value
@@ -153,9 +150,6 @@
public void write(DataOutput out) throws IOException {
int numValues = size();
- if (numValues > 100) {
- System.out.println("write num values: " + numValues);
- }
out.writeInt(numValues); // write number of values
for (int i = 0; i < numValues; i++) {
get(i).write(out);
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
index 03c37dc..4ee1deb 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
@@ -28,6 +28,8 @@
import edu.uci.ics.pregelix.api.graph.VertexPartitioner;
import edu.uci.ics.pregelix.api.io.VertexInputFormat;
import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+import edu.uci.ics.pregelix.api.job.ICheckpointHook;
import edu.uci.ics.pregelix.api.job.PregelixJob;
/**
@@ -49,7 +51,7 @@
* @return User's vertex input format class
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> Class<? extends VertexInputFormat<I, V, E, M>> getVertexInputFormatClass(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> Class<? extends VertexInputFormat<I, V, E, M>> getVertexInputFormatClass(
Configuration conf) {
return (Class<? extends VertexInputFormat<I, V, E, M>>) conf.getClass(PregelixJob.VERTEX_INPUT_FORMAT_CLASS,
null, VertexInputFormat.class);
@@ -63,7 +65,7 @@
* @return Instantiated user vertex input format class
*/
@SuppressWarnings("rawtypes")
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> VertexInputFormat<I, V, E, M> createVertexInputFormat(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> VertexInputFormat<I, V, E, M> createVertexInputFormat(
Configuration conf) {
Class<? extends VertexInputFormat<I, V, E, M>> vertexInputFormatClass = getVertexInputFormatClass(conf);
VertexInputFormat<I, V, E, M> inputFormat = ReflectionUtils.newInstance(vertexInputFormatClass, conf);
@@ -106,7 +108,7 @@
* @return User's vertex combiner class
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
- public static <I extends WritableComparable, M extends Writable, P extends Writable> Class<? extends MessageCombiner<I, M, P>> getMessageCombinerClass(
+ public static <I extends WritableComparable, M extends WritableSizable, P extends Writable> Class<? extends MessageCombiner<I, M, P>> getMessageCombinerClass(
Configuration conf) {
return (Class<? extends MessageCombiner<I, M, P>>) conf.getClass(PregelixJob.Message_COMBINER_CLASS,
DefaultMessageCombiner.class, MessageCombiner.class);
@@ -120,7 +122,7 @@
* @return User's vertex combiner class
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable, P extends Writable, F extends Writable> Class<? extends GlobalAggregator<I, V, E, M, P, F>> getGlobalAggregatorClass(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable, P extends Writable, F extends Writable> Class<? extends GlobalAggregator<I, V, E, M, P, F>> getGlobalAggregatorClass(
Configuration conf) {
return (Class<? extends GlobalAggregator<I, V, E, M, P, F>>) conf.getClass(PregelixJob.GLOBAL_AGGREGATOR_CLASS,
GlobalCountAggregator.class, GlobalAggregator.class);
@@ -138,7 +140,7 @@
* @return Instantiated user vertex combiner class
*/
@SuppressWarnings("rawtypes")
- public static <I extends WritableComparable, M extends Writable, P extends Writable> MessageCombiner<I, M, P> createMessageCombiner(
+ public static <I extends WritableComparable, M extends WritableSizable, P extends Writable> MessageCombiner<I, M, P> createMessageCombiner(
Configuration conf) {
Class<? extends MessageCombiner<I, M, P>> vertexCombinerClass = getMessageCombinerClass(conf);
return ReflectionUtils.newInstance(vertexCombinerClass, conf);
@@ -164,7 +166,7 @@
* @return Instantiated user vertex combiner class
*/
@SuppressWarnings("rawtypes")
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable, P extends Writable, F extends Writable> GlobalAggregator<I, V, E, M, P, F> createGlobalAggregator(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable, P extends Writable, F extends Writable> GlobalAggregator<I, V, E, M, P, F> createGlobalAggregator(
Configuration conf) {
Class<? extends GlobalAggregator<I, V, E, M, P, F>> globalAggregatorClass = getGlobalAggregatorClass(conf);
return ReflectionUtils.newInstance(globalAggregatorClass, conf);
@@ -178,7 +180,7 @@
* @return User's vertex class
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> Class<? extends Vertex<I, V, E, M>> getVertexClass(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> Class<? extends Vertex<I, V, E, M>> getVertexClass(
Configuration conf) {
return (Class<? extends Vertex<I, V, E, M>>) conf.getClass(PregelixJob.VERTEX_CLASS, null, Vertex.class);
}
@@ -191,7 +193,7 @@
* @return Instantiated user vertex
*/
@SuppressWarnings("rawtypes")
- public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable> Vertex<I, V, E, M> createVertex(
+ public static <I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable> Vertex<I, V, E, M> createVertex(
Configuration conf) {
Class<? extends Vertex<I, V, E, M>> vertexClass = getVertexClass(conf);
Vertex<I, V, E, M> vertex = ReflectionUtils.newInstance(vertexClass, conf);
@@ -299,7 +301,7 @@
* @return User's vertex message value class
*/
@SuppressWarnings("unchecked")
- public static <M extends Writable> Class<M> getMessageValueClass(Configuration conf) {
+ public static <M extends WritableSizable> Class<M> getMessageValueClass(Configuration conf) {
if (conf == null)
conf = defaultConf;
return (Class<M>) conf.getClass(PregelixJob.MESSAGE_VALUE_CLASS, Writable.class);
@@ -369,7 +371,7 @@
* Configuration to check
* @return Instantiated user vertex message value
*/
- public static <M extends Writable> M createMessageValue(Configuration conf) {
+ public static <M extends WritableSizable> M createMessageValue(Configuration conf) {
Class<M> messageValueClass = getMessageValueClass(conf);
try {
return messageValueClass.newInstance();
@@ -460,6 +462,24 @@
}
/**
+ * Create a checkpoint hook
+ *
+ * @param conf
+ * Configuration to check
+ * @return Instantiated user aggregate value
+ */
+ public static ICheckpointHook createCheckpointHook(Configuration conf) {
+ Class<? extends ICheckpointHook> ckpClass = getCheckpointHookClass(conf);
+ try {
+ return ckpClass.newInstance();
+ } catch (InstantiationException e) {
+ throw new IllegalArgumentException("createVertexPartitioner: Failed to instantiate", e);
+ } catch (IllegalAccessException e) {
+ throw new IllegalArgumentException("createVertexPartitioner: Illegally accessed", e);
+ }
+ }
+
+ /**
* Get the user's subclassed vertex partitioner class.
*
* @param conf
@@ -474,6 +494,20 @@
}
/**
+ * Get the user's subclassed checkpoint hook class.
+ *
+ * @param conf
+ * Configuration to check
+ * @return The user defined vertex checkpoint hook class
+ */
+ @SuppressWarnings("unchecked")
+ public static <V extends ICheckpointHook> Class<V> getCheckpointHookClass(Configuration conf) {
+ if (conf == null)
+ conf = defaultConf;
+ return (Class<V>) conf.getClass(PregelixJob.CKP_CLASS, DefaultCheckpointHook.class, ICheckpointHook.class);
+ }
+
+ /**
* Get the job configuration parameter whether the vertex states will increase dynamically
*
* @param conf
@@ -504,4 +538,68 @@
public static boolean useLSM(Configuration conf) {
return conf.getBoolean(PregelixJob.UPDATE_INTENSIVE, false);
}
+
+ /***
+ * Get the spilling dir name for global aggregates
+ *
+ * @param conf
+ * @param superStep
+ * @return the spilling dir name
+ */
+ public static String getGlobalAggregateSpillingDirName(Configuration conf, long superStep) {
+ return "/tmp/pregelix/agg/" + conf.get(PregelixJob.JOB_ID) + "/" + superStep;
+ }
+
+ /**
+ * Get the path for vertex checkpointing
+ *
+ * @param conf
+ * @param lastSuperStep
+ * @return the path for vertex checkpointing
+ */
+ public static String getVertexCheckpointPath(Configuration conf, long lastSuperStep) {
+ return "/tmp/ckpoint/" + BspUtils.getJobId(conf) + "/vertex/" + lastSuperStep;
+ }
+
+ /**
+ * Get the path for message checkpointing
+ *
+ * @param conf
+ * @param lastSuperStep
+ * @return the path for message checkpointing
+ */
+ public static String getMessageCheckpointPath(Configuration conf, long lastSuperStep) {
+ String path = "/tmp/ckpoint/" + BspUtils.getJobId(conf) + "/message/" + lastSuperStep;
+ return path;
+ }
+
+ /**
+ * Get the path for message checkpointing
+ *
+ * @param conf
+ * @param lastSuperStep
+ * @return the path for message checkpointing
+ */
+ public static String getSecondaryIndexCheckpointPath(Configuration conf, long lastSuperStep) {
+ return "/tmp/ckpoint/" + BspUtils.getJobId(conf) + "/secondaryindex/" + lastSuperStep;
+ }
+
+ /***
+ * Get the recovery count
+ *
+ * @return recovery count
+ */
+ public static int getRecoveryCount(Configuration conf) {
+ return conf.getInt(PregelixJob.RECOVERY_COUNT, 0);
+ }
+
+ /***
+ * Get the user-set checkpoint interval
+ *
+ * @param conf
+ * @return the checkpoint interval
+ */
+ public static int getCheckpointingInterval(Configuration conf) {
+ return conf.getInt(PregelixJob.CKP_INTERVAL, -1);
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ConservativeCheckpointHook.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ConservativeCheckpointHook.java
new file mode 100644
index 0000000..4f5fef0
--- /dev/null
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ConservativeCheckpointHook.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.api.util;
+
+import edu.uci.ics.pregelix.api.job.ICheckpointHook;
+
+/**
+ * A conservative checkpoint hook which does checkpoint every 5 supersteps
+ *
+ * @author yingyib
+ */
+public class ConservativeCheckpointHook implements ICheckpointHook {
+
+ @Override
+ public boolean checkpoint(int superstep) {
+ if (superstep % 2 == 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultCheckpointHook.java
similarity index 65%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultCheckpointHook.java
index cde5022..c37c4ab 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultCheckpointHook.java
@@ -12,17 +12,20 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.api.util;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+import edu.uci.ics.pregelix.api.job.ICheckpointHook;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+/**
+ * The default checkpoint hook which never does checkpointing.
+ *
+ * @author yingyib
+ */
+public class DefaultCheckpointHook implements ICheckpointHook {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+ @Override
+ public boolean checkpoint(int superstep) {
+ return false;
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
- }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultMessageCombiner.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultMessageCombiner.java
index d2d90a2..feb9e2f 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultMessageCombiner.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/DefaultMessageCombiner.java
@@ -14,42 +14,82 @@
*/
package edu.uci.ics.pregelix.api.util;
-import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.pregelix.api.graph.MessageCombiner;
import edu.uci.ics.pregelix.api.graph.MsgList;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
@SuppressWarnings({ "rawtypes", "unchecked" })
-public class DefaultMessageCombiner<I extends WritableComparable, M extends Writable> extends
+public class DefaultMessageCombiner<I extends WritableComparable, M extends WritableSizable> extends
MessageCombiner<I, M, MsgList> {
private MsgList<M> msgList;
+ private int metaSlot = 8;
+ private int accumulatedSize = metaSlot;
@Override
public void init(MsgList providedMsgList) {
+ realInit(providedMsgList);
+ this.msgList.setSegmentStart(false);
+ }
+
+ private void realInit(MsgList providedMsgList) {
this.msgList = providedMsgList;
this.msgList.clearElements();
+ this.accumulatedSize = metaSlot;
}
@Override
public void stepPartial(I vertexIndex, M msg) throws HyracksDataException {
msgList.addElement(msg);
+ accumulatedSize += msg.sizeInBytes();
}
@Override
public void stepFinal(I vertexIndex, MsgList partialAggregate) throws HyracksDataException {
msgList.addAllElements(partialAggregate);
+ for (int i = 0; i < partialAggregate.size(); i++) {
+ accumulatedSize += ((M) partialAggregate.get(i)).sizeInBytes();
+ }
}
@Override
public MsgList finishPartial() {
+ msgList.setSegmentEnd(false);
return msgList;
}
@Override
public MsgList<M> finishFinal() {
+ msgList.setSegmentEnd(false);
return msgList;
}
+ @Override
+ public void initAll(MsgList providedMsgList) {
+ realInit(providedMsgList);
+ msgList.setSegmentStart(true);
+ }
+
+ @Override
+ public MsgList<M> finishFinalAll() {
+ msgList.setSegmentEnd(true);
+ return msgList;
+ }
+
+ @Override
+ public int estimateAccumulatedStateByteSizePartial(I vertexIndex, M msg) throws HyracksDataException {
+ return accumulatedSize + msg.sizeInBytes();
+ }
+
+ @Override
+ public int estimateAccumulatedStateByteSizeFinal(I vertexIndex, MsgList partialAggregate)
+ throws HyracksDataException {
+ int size = accumulatedSize;
+ for (int i = 0; i < partialAggregate.size(); i++) {
+ size += ((M) partialAggregate.get(i)).sizeInBytes();
+ }
+ return size;
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/FrameTupleUtils.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/FrameTupleUtils.java
index 24105ae..a0f67e3 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/FrameTupleUtils.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/FrameTupleUtils.java
@@ -15,6 +15,14 @@
package edu.uci.ics.pregelix.api.util;
+import java.io.IOException;
+import java.util.UUID;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
@@ -41,4 +49,19 @@
}
}
+ public static void flushTupleToHDFS(ArrayTupleBuilder atb, Configuration conf, long superStep)
+ throws HyracksDataException {
+ try {
+ if (atb.getSize()>0) {
+ FileSystem dfs = FileSystem.get(conf);
+ String fileName = BspUtils.getGlobalAggregateSpillingDirName(conf, superStep) +"/" + UUID.randomUUID();
+ FSDataOutputStream dos = dfs.create(new Path(fileName), true);
+ dos.write(atb.getByteArray(), 0, atb.getSize());
+ dos.flush();
+ dos.close();
+ }
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/GlobalCountAggregator.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/GlobalCountAggregator.java
index ffc6526..9a95f09 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/GlobalCountAggregator.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/GlobalCountAggregator.java
@@ -21,9 +21,10 @@
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
@SuppressWarnings("rawtypes")
-public class GlobalCountAggregator<I extends WritableComparable, V extends Writable, E extends Writable, M extends Writable>
+public class GlobalCountAggregator<I extends WritableComparable, V extends Writable, E extends Writable, M extends WritableSizable>
extends GlobalAggregator<I, V, E, M, LongWritable, LongWritable> {
private LongWritable state = new LongWritable(0);
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/JobStateUtils.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/JobStateUtils.java
new file mode 100644
index 0000000..4a98167
--- /dev/null
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/JobStateUtils.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.api.util;
+
+import java.io.IOException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * @author yingyib
+ */
+public class JobStateUtils {
+
+ public static final String TMP_DIR = "/tmp/";
+
+ public static void writeForceTerminationState(Configuration conf, String jobId) throws HyracksDataException {
+ try {
+ FileSystem dfs = FileSystem.get(conf);
+ String pathStr = TMP_DIR + jobId + "fterm";
+ Path path = new Path(pathStr);
+ if (!dfs.exists(path)) {
+ FSDataOutputStream output = dfs.create(path, true);
+ output.writeBoolean(true);
+ output.flush();
+ output.close();
+ }
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public static boolean readForceTerminationState(Configuration conf, String jobId) throws HyracksDataException {
+ try {
+ FileSystem dfs = FileSystem.get(conf);
+ String pathStr = TMP_DIR + jobId + "fterm";
+ Path path = new Path(pathStr);
+ if (dfs.exists(path)) {
+ return true;
+ } else {
+ return false;
+ }
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ResetableByteArrayInputStream.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ResetableByteArrayInputStream.java
index 7d5f627..c7febc1 100755
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ResetableByteArrayInputStream.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/ResetableByteArrayInputStream.java
@@ -15,11 +15,8 @@
package edu.uci.ics.pregelix.api.util;
import java.io.InputStream;
-import java.util.logging.Level;
-import java.util.logging.Logger;
public class ResetableByteArrayInputStream extends InputStream {
- private static final Logger LOGGER = Logger.getLogger(ResetableByteArrayInputStream.class.getName());
private byte[] data;
private int position;
@@ -36,19 +33,12 @@
public int read() {
int remaining = data.length - position;
int value = remaining > 0 ? (data[position++] & 0xff) : -1;
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest("read(): value: " + value + " remaining: " + remaining + " position: " + position);
- }
return value;
}
@Override
public int read(byte[] bytes, int offset, int length) {
int remaining = data.length - position;
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest("read(bytes[], int, int): remaining: " + remaining + " offset: " + offset + " length: "
- + length + " position: " + position);
- }
if (remaining == 0) {
return -1;
}
@@ -57,4 +47,9 @@
position += l;
return l;
}
-}
\ No newline at end of file
+
+ @Override
+ public int available() {
+ return data.length - position;
+ }
+}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/SerDeUtils.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/SerDeUtils.java
index 25b07ff..a4336a3 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/SerDeUtils.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/SerDeUtils.java
@@ -40,47 +40,4 @@
object.readFields(input);
}
- public static long readVLong(DataInput in) throws IOException {
- int vLen = 0;
- long value = 0L;
- while (true) {
- byte b = (byte) in.readByte();
- ++vLen;
- value += (((long) (b & 0x7f)) << ((vLen - 1) * 7));
- if ((b & 0x80) == 0) {
- break;
- }
- }
- return value;
- }
-
- public static void writeVLong(DataOutput out, long value) throws IOException {
- long data = value;
- do {
- byte b = (byte) (data & 0x7f);
- data >>= 7;
- if (data != 0) {
- b |= 0x80;
- }
- out.write(b);
- } while (data != 0);
- }
-
- public static long readVLong(byte[] data, int start, int length) {
- int vLen = 0;
- long value = 0L;
- while (true) {
- byte b = (byte) data[start];
- ++vLen;
- value += (((long) (b & 0x7f)) << ((vLen - 1) * 7));
- if ((b & 0x80) == 0) {
- break;
- }
- ++start;
- }
- if (vLen != length)
- throw new IllegalStateException("length mismatch -- vLen:" + vLen + " length:" + length);
- return value;
- }
-
}
diff --git a/pregelix/pregelix-benchmark/pom.xml b/pregelix/pregelix-benchmark/pom.xml
new file mode 100644
index 0000000..4d7d456
--- /dev/null
+++ b/pregelix/pregelix-benchmark/pom.xml
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>pregelix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.10-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>pregelix-benchmark</artifactId>
+ <name>pregelix-benchmark</name>
+ <url>http://maven.apache.org</url>
+ <dependencies>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.giraph</groupId>
+ <artifactId>giraph-core</artifactId>
+ <version>1.0.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/PageRankVertex.java b/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/PageRankVertex.java
new file mode 100644
index 0000000..04c29de
--- /dev/null
+++ b/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/PageRankVertex.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.benchmark;
+
+import org.apache.giraph.graph.Vertex;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.VLongWritable;
+
+/**
+ * Demonstrates the basic Pregel PageRank implementation.
+ */
+public class PageRankVertex extends Vertex<VLongWritable, DoubleWritable, FloatWritable, DoubleWritable> {
+
+ public static final String ITERATIONS = "HyracksPageRankVertex.iteration";
+ private final DoubleWritable vertexValue = new DoubleWritable();
+ private final DoubleWritable msg = new DoubleWritable();
+ private int maxIteration = -1;
+
+ @Override
+ public void compute(Iterable<DoubleWritable> msgIterator) {
+ if (maxIteration < 0) {
+ maxIteration = getContext().getConfiguration().getInt(ITERATIONS, 10);
+ }
+ if (getSuperstep() == 1) {
+ vertexValue.set(1.0 / getTotalNumVertices());
+ }
+ if (getSuperstep() >= 2 && getSuperstep() <= maxIteration) {
+ double sum = 0;
+ for (DoubleWritable msg : msgIterator) {
+ sum += msg.get();
+ }
+ vertexValue.set((0.15 / getTotalNumVertices()) + 0.85 * sum);
+ }
+
+ if (getSuperstep() >= 1 && getSuperstep() < maxIteration) {
+ long edges = getNumEdges();
+ msg.set(vertexValue.get() / edges);
+ sendMessageToAllEdges(msg);
+ } else {
+ voteToHalt();
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/TextPageRankInputFormat.java b/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/TextPageRankInputFormat.java
new file mode 100644
index 0000000..3d85f66
--- /dev/null
+++ b/pregelix/pregelix-benchmark/src/main/java/edu/uci/ics/pregelix/benchmark/TextPageRankInputFormat.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.benchmark;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.giraph.edge.Edge;
+import org.apache.giraph.edge.MapMutableEdge;
+import org.apache.giraph.io.formats.TextVertexInputFormat;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.VLongWritable;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+public class TextPageRankInputFormat extends TextVertexInputFormat<VLongWritable, DoubleWritable, FloatWritable> {
+
+ @Override
+ public TextVertexReader createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException {
+ return new TextVertexReaderFromEachLine() {
+ String[] items;
+
+ @Override
+ protected VLongWritable getId(Text line) throws IOException {
+ items = line.toString().split(" ");
+ return new VLongWritable(Long.parseLong(items[0]));
+ }
+
+ @Override
+ protected DoubleWritable getValue(Text line) throws IOException {
+ return null;
+ }
+
+ @Override
+ protected Iterable<Edge<VLongWritable, FloatWritable>> getEdges(Text line) throws IOException {
+ List<Edge<VLongWritable, FloatWritable>> edges = new ArrayList<Edge<VLongWritable, FloatWritable>>();
+ Map<VLongWritable, FloatWritable> edgeMap = new HashMap<VLongWritable, FloatWritable>();
+ for (int i = 1; i < items.length; i++) {
+ edgeMap.put(new VLongWritable(Long.parseLong(items[i])), null);
+ }
+ for (Entry<VLongWritable, FloatWritable> entry : edgeMap.entrySet()) {
+ MapMutableEdge<VLongWritable, FloatWritable> edge = new MapMutableEdge<VLongWritable, FloatWritable>();
+ edge.setEntry(entry);
+ edge.setValue(null);
+ edges.add(edge);
+ }
+ return edges;
+ }
+
+ };
+ }
+}
diff --git a/pregelix/pregelix-core/pom.xml b/pregelix/pregelix-core/pom.xml
index 2a3efcf..3d1699f 100644
--- a/pregelix/pregelix-core/pom.xml
+++ b/pregelix/pregelix-core/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
@@ -90,21 +90,55 @@
<version>1.3</version>
<executions>
<execution>
+ <id>pregelix</id>
<configuration>
+ <platforms>
+ <platform>unix</platform>
+ </platforms>
<programs>
<program>
<mainClass>org.apache.hadoop.util.RunJar</mainClass>
- <name>pregelix-obselete</name>
+ <name>pregelix</name>
</program>
</programs>
<repositoryLayout>flat</repositoryLayout>
<repositoryName>lib</repositoryName>
+ <configurationDirectory>etc:"$HADOOP_HOME"/conf:/etc/hadoop/conf:"$1"</configurationDirectory>
</configuration>
<phase>package</phase>
<goals>
<goal>assemble</goal>
</goals>
</execution>
+ <execution>
+ <id>cc_nc_drivers</id>
+ <configuration>
+ <platforms>
+ <platform>unix</platform>
+ </platforms>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
+ <name>pregelixcc</name>
+ </program>
+ <program>
+ <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
+ <name>pregelixnc</name>
+ <commandLineArguments>
+ <commandLineArgument>-app-nc-main-class</commandLineArgument>
+ <commandLineArgument>edu.uci.ics.pregelix.runtime.bootstrap.NCApplicationEntryPoint</commandLineArgument>
+ </commandLineArguments>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ <configurationDirectory>etc:"$HADOOP_HOME"/conf:/etc/hadoop/conf</configurationDirectory>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
</executions>
</plugin>
<plugin>
@@ -209,84 +243,84 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-runtime</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -300,7 +334,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks.examples</groupId>
<artifactId>hyracks-integration-tests</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>test</scope>
</dependency>
<dependency>
@@ -320,7 +354,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-ipc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IDriver.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IDriver.java
index bc6c0cf..c72f392 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IDriver.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IDriver.java
@@ -15,6 +15,8 @@
package edu.uci.ics.pregelix.core.base;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.pregelix.api.job.PregelixJob;
@@ -29,6 +31,11 @@
public void runJob(PregelixJob job, String ipAddress, int port) throws HyracksException;
+ public void runJobs(List<PregelixJob> jobs, String ipAddress, int port) throws HyracksException;
+
public void runJob(PregelixJob job, Plan planChoice, String ipAddress, int port, boolean profiling)
throws HyracksException;
+
+ public void runJobs(List<PregelixJob> jobs, Plan planChoice, String ipAddress, int port, boolean profiling)
+ throws HyracksException;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IJobGen.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IJobGen.java
index 2d58902..6bb0dea 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IJobGen.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/base/IJobGen.java
@@ -26,4 +26,10 @@
public JobSpecification generateJob(int iteration) throws HyracksException;
+ public JobSpecification[] generateCheckpointing(int lastSuccessfulIteration) throws HyracksException;
+
+ public JobSpecification[] generateLoadingCheckpoint(int lastCheckpointedIteration) throws HyracksException;
+
+ public JobSpecification generateClearState() throws HyracksException;
+
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
index 2d4064b..d6a6f3d 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/driver/Driver.java
@@ -17,15 +17,22 @@
import java.io.File;
import java.io.FilenameFilter;
+import java.io.IOException;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.EnumSet;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import edu.uci.ics.hyracks.api.client.HyracksConnection;
@@ -35,7 +42,9 @@
import edu.uci.ics.hyracks.api.job.JobFlag;
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.pregelix.api.job.ICheckpointHook;
import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.core.base.IDriver;
import edu.uci.ics.pregelix.core.jobgen.JobGen;
import edu.uci.ics.pregelix.core.jobgen.JobGenInnerJoin;
@@ -43,16 +52,15 @@
import edu.uci.ics.pregelix.core.jobgen.JobGenOuterJoinSingleSort;
import edu.uci.ics.pregelix.core.jobgen.JobGenOuterJoinSort;
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.ExceptionUtilities;
import edu.uci.ics.pregelix.dataflow.util.IterationUtils;
@SuppressWarnings("rawtypes")
public class Driver implements IDriver {
private static final Log LOG = LogFactory.getLog(Driver.class);
- private JobGen jobGen;
- private boolean profiling;
-
private IHyracksClientConnection hcc;
private Class exampleClass;
+ private boolean profiling = false;
public Driver(Class exampleClass) {
this.exampleClass = exampleClass;
@@ -64,93 +72,251 @@
}
@Override
+ public void runJobs(List<PregelixJob> jobs, String ipAddress, int port) throws HyracksException {
+ runJobs(jobs, Plan.OUTER_JOIN, ipAddress, port, false);
+ }
+
+ @Override
public void runJob(PregelixJob job, Plan planChoice, String ipAddress, int port, boolean profiling)
throws HyracksException {
+ runJobs(Collections.singletonList(job), planChoice, ipAddress, port, profiling);
+ }
+
+ @Override
+ public void runJobs(List<PregelixJob> jobs, Plan planChoice, String ipAddress, int port, boolean profiling)
+ throws HyracksException {
try {
- /** add hadoop configurations */
- URL hadoopCore = job.getClass().getClassLoader().getResource("core-site.xml");
- if (hadoopCore != null) {
- job.getConfiguration().addResource(hadoopCore);
+ if (jobs.size() <= 0) {
+ throw new HyracksException("Please submit at least one job for execution!");
}
- URL hadoopMapRed = job.getClass().getClassLoader().getResource("mapred-site.xml");
- if (hadoopMapRed != null) {
- job.getConfiguration().addResource(hadoopMapRed);
- }
- URL hadoopHdfs = job.getClass().getClassLoader().getResource("hdfs-site.xml");
- if (hadoopHdfs != null) {
- job.getConfiguration().addResource(hadoopHdfs);
- }
- ClusterConfig.loadClusterConfig(ipAddress, port);
-
- LOG.info("job started");
- long start = System.currentTimeMillis();
- long end = start;
- long time = 0;
-
this.profiling = profiling;
+ PregelixJob currentJob = jobs.get(0);
+ PregelixJob lastJob = currentJob;
+ addHadoopConfiguration(currentJob, ipAddress, port, true);
+ JobGen jobGen = null;
- switch (planChoice) {
- case INNER_JOIN:
- jobGen = new JobGenInnerJoin(job);
- break;
- case OUTER_JOIN:
- jobGen = new JobGenOuterJoin(job);
- break;
- case OUTER_JOIN_SORT:
- jobGen = new JobGenOuterJoinSort(job);
- break;
- case OUTER_JOIN_SINGLE_SORT:
- jobGen = new JobGenOuterJoinSingleSort(job);
- break;
- default:
- jobGen = new JobGenInnerJoin(job);
- }
+ /** prepare job -- deploy jars */
+ DeploymentId deploymentId = prepareJobs(ipAddress, port);
+ LOG.info("job started");
- if (hcc == null)
- hcc = new HyracksConnection(ipAddress, port);
+ IntWritable lastSnapshotJobIndex = new IntWritable(0);
+ IntWritable lastSnapshotSuperstep = new IntWritable(0);
+ boolean failed = false;
+ int retryCount = 0;
+ int maxRetryCount = 3;
+ jobGen = selectJobGen(planChoice, currentJob);
- URLClassLoader classLoader = (URLClassLoader) exampleClass.getClassLoader();
- List<File> jars = new ArrayList<File>();
- URL[] urls = classLoader.getURLs();
- for (URL url : urls)
- if (url.toString().endsWith(".jar"))
- jars.add(new File(url.getPath()));
- DeploymentId deploymentId = installApplication(jars);
-
- start = System.currentTimeMillis();
- FileSystem dfs = FileSystem.get(job.getConfiguration());
- dfs.delete(FileOutputFormat.getOutputPath(job), true);
- runCreate(deploymentId, jobGen);
- runDataLoad(deploymentId, jobGen);
- end = System.currentTimeMillis();
- time = end - start;
- LOG.info("data loading finished " + time + "ms");
- int i = 1;
- boolean terminate = false;
do {
- start = System.currentTimeMillis();
- runLoopBodyIteration(deploymentId, jobGen, i);
- end = System.currentTimeMillis();
- time = end - start;
- LOG.info("iteration " + i + " finished " + time + "ms");
- terminate = IterationUtils.readTerminationState(job.getConfiguration(), jobGen.getJobId())
- || IterationUtils.readForceTerminationState(job.getConfiguration(), jobGen.getJobId());
- i++;
- } while (!terminate);
+ try {
+ for (int i = lastSnapshotJobIndex.get(); i < jobs.size(); i++) {
+ lastJob = currentJob;
+ currentJob = jobs.get(i);
+ currentJob.setRecoveryCount(retryCount);
- start = System.currentTimeMillis();
- runHDFSWRite(deploymentId, jobGen);
- runCleanup(deploymentId, jobGen);
- end = System.currentTimeMillis();
- time = end - start;
- LOG.info("result writing finished " + time + "ms");
- hcc.unDeployBinary(deploymentId);
+ /** add hadoop configurations */
+ addHadoopConfiguration(currentJob, ipAddress, port, failed);
+ ICheckpointHook ckpHook = BspUtils.createCheckpointHook(currentJob.getConfiguration());
+
+ /** load the data */
+ if ((i == 0 || compatible(lastJob, currentJob)) && !failed) {
+ if (i != 0) {
+ finishJobs(jobGen, deploymentId);
+ /** invalidate/clear checkpoint */
+ lastSnapshotJobIndex.set(0);
+ lastSnapshotSuperstep.set(0);
+ }
+ jobGen.reset(currentJob);
+ loadData(currentJob, jobGen, deploymentId);
+ } else {
+ jobGen.reset(currentJob);
+ }
+
+ /** run loop-body jobs */
+ runLoopBody(deploymentId, currentJob, jobGen, i, lastSnapshotJobIndex, lastSnapshotSuperstep,
+ ckpHook, failed);
+ runClearState(deploymentId, jobGen);
+ failed = false;
+ }
+
+ /** finish the jobs */
+ finishJobs(jobGen, deploymentId);
+ /** clear checkpoints if any */
+ jobGen.clearCheckpoints();
+ hcc.unDeployBinary(deploymentId);
+ } catch (Exception e1) {
+ Set<String> blackListNodes = new HashSet<String>();
+ /** disk failures or node failures */
+ if (ExceptionUtilities.recoverable(e1, blackListNodes)) {
+ ClusterConfig.addToBlackListNodes(blackListNodes);
+ failed = true;
+ retryCount++;
+ } else {
+ throw e1;
+ }
+ }
+ } while (failed && retryCount < maxRetryCount);
LOG.info("job finished");
} catch (Exception e) {
throw new HyracksException(e);
}
}
+ private boolean compatible(PregelixJob lastJob, PregelixJob currentJob) {
+ Class lastVertexIdClass = BspUtils.getVertexIndexClass(lastJob.getConfiguration());
+ Class lastVertexValueClass = BspUtils.getVertexValueClass(lastJob.getConfiguration());
+ Class lastEdgeValueClass = BspUtils.getEdgeValueClass(lastJob.getConfiguration());
+ Path lastOutputPath = FileOutputFormat.getOutputPath(lastJob);
+
+ Class currentVertexIdClass = BspUtils.getVertexIndexClass(currentJob.getConfiguration());
+ Class currentVertexValueClass = BspUtils.getVertexValueClass(currentJob.getConfiguration());
+ Class currentEdegeValueClass = BspUtils.getEdgeValueClass(currentJob.getConfiguration());
+ Path[] currentInputPaths = FileInputFormat.getInputPaths(currentJob);
+
+ return lastVertexIdClass.equals(currentVertexIdClass)
+ && lastVertexValueClass.equals(currentVertexValueClass)
+ && lastEdgeValueClass.equals(currentEdegeValueClass)
+ && (currentInputPaths.length == 0 || (currentInputPaths.length == 1 && lastOutputPath
+ .equals(currentInputPaths[0])));
+ }
+
+ private JobGen selectJobGen(Plan planChoice, PregelixJob currentJob) {
+ JobGen jobGen;
+ switch (planChoice) {
+ case INNER_JOIN:
+ jobGen = new JobGenInnerJoin(currentJob);
+ break;
+ case OUTER_JOIN:
+ jobGen = new JobGenOuterJoin(currentJob);
+ break;
+ case OUTER_JOIN_SORT:
+ jobGen = new JobGenOuterJoinSort(currentJob);
+ break;
+ case OUTER_JOIN_SINGLE_SORT:
+ jobGen = new JobGenOuterJoinSingleSort(currentJob);
+ break;
+ default:
+ jobGen = new JobGenInnerJoin(currentJob);
+ }
+ return jobGen;
+ }
+
+ private long loadData(PregelixJob currentJob, JobGen jobGen, DeploymentId deploymentId) throws IOException,
+ Exception {
+ long start;
+ long end;
+ long time;
+ start = System.currentTimeMillis();
+ FileSystem dfs = FileSystem.get(currentJob.getConfiguration());
+ Path outputPath = FileOutputFormat.getOutputPath(currentJob);
+ if (outputPath != null) {
+ dfs.delete(outputPath, true);
+ }
+ runCreate(deploymentId, jobGen);
+ runDataLoad(deploymentId, jobGen);
+ end = System.currentTimeMillis();
+ time = end - start;
+ LOG.info("data loading finished " + time + "ms");
+ return time;
+ }
+
+ private void finishJobs(JobGen jobGen, DeploymentId deploymentId) throws Exception {
+ long start;
+ long end;
+ long time;
+ start = System.currentTimeMillis();
+ runHDFSWRite(deploymentId, jobGen);
+ runCleanup(deploymentId, jobGen);
+ end = System.currentTimeMillis();
+ time = end - start;
+ LOG.info("result writing finished " + time + "ms");
+ }
+
+ private DeploymentId prepareJobs(String ipAddress, int port) throws Exception {
+ if (hcc == null) {
+ hcc = new HyracksConnection(ipAddress, port);
+ }
+ URLClassLoader classLoader = (URLClassLoader) exampleClass.getClassLoader();
+ List<File> jars = new ArrayList<File>();
+ URL[] urls = classLoader.getURLs();
+ for (URL url : urls)
+ if (url.toString().endsWith(".jar"))
+ jars.add(new File(url.getPath()));
+ DeploymentId deploymentId = installApplication(jars);
+ return deploymentId;
+ }
+
+ private void addHadoopConfiguration(PregelixJob job, String ipAddress, int port, boolean loadClusterConfig)
+ throws HyracksException {
+ URL hadoopCore = job.getClass().getClassLoader().getResource("core-site.xml");
+ if (hadoopCore != null) {
+ job.getConfiguration().addResource(hadoopCore);
+ }
+ URL hadoopMapRed = job.getClass().getClassLoader().getResource("mapred-site.xml");
+ if (hadoopMapRed != null) {
+ job.getConfiguration().addResource(hadoopMapRed);
+ }
+ URL hadoopHdfs = job.getClass().getClassLoader().getResource("hdfs-site.xml");
+ if (hadoopHdfs != null) {
+ job.getConfiguration().addResource(hadoopHdfs);
+ }
+ if (loadClusterConfig) {
+ ClusterConfig.loadClusterConfig(ipAddress, port);
+ }
+ }
+
+ private void runLoopBody(DeploymentId deploymentId, PregelixJob job, JobGen jobGen, int currentJobIndex,
+ IntWritable snapshotJobIndex, IntWritable snapshotSuperstep, ICheckpointHook ckpHook, boolean doRecovery)
+ throws Exception {
+ if (doRecovery) {
+ /** reload the checkpoint */
+ if (snapshotSuperstep.get() > 0) {
+ runClearState(deploymentId, jobGen);
+ runLoadCheckpoint(deploymentId, jobGen, snapshotSuperstep.get());
+ } else {
+ runClearState(deploymentId, jobGen);
+ loadData(job, jobGen, deploymentId);
+ }
+ }
+ int i = doRecovery ? snapshotSuperstep.get() + 1 : 1;
+ int ckpInterval = BspUtils.getCheckpointingInterval(job.getConfiguration());
+ boolean terminate = false;
+ long start, end, time;
+ do {
+ start = System.currentTimeMillis();
+ runLoopBodyIteration(deploymentId, jobGen, i);
+ end = System.currentTimeMillis();
+ time = end - start;
+ LOG.info(job + ": iteration " + i + " finished " + time + "ms");
+ terminate = IterationUtils.readTerminationState(job.getConfiguration(), jobGen.getJobId())
+ || IterationUtils.readForceTerminationState(job.getConfiguration(), jobGen.getJobId());
+ if (ckpHook.checkpoint(i) || (ckpInterval > 0 && i % ckpInterval == 0)) {
+ runCheckpoint(deploymentId, jobGen, i);
+ snapshotJobIndex.set(currentJobIndex);
+ snapshotSuperstep.set(i);
+ }
+ i++;
+ } while (!terminate);
+ }
+
+ private void runCheckpoint(DeploymentId deploymentId, JobGen jobGen, int lastSuccessfulIteration) throws Exception {
+ try {
+ JobSpecification[] ckpJobs = jobGen.generateCheckpointing(lastSuccessfulIteration);
+ runJobArray(deploymentId, ckpJobs);
+ } catch (Exception e) {
+ throw e;
+ }
+ }
+
+ private void runLoadCheckpoint(DeploymentId deploymentId, JobGen jobGen, int checkPointedIteration)
+ throws Exception {
+ try {
+ JobSpecification[] ckpJobs = jobGen.generateLoadingCheckpoint(checkPointedIteration);
+ runJobArray(deploymentId, ckpJobs);
+ } catch (Exception e) {
+ throw e;
+ }
+ }
+
private void runCreate(DeploymentId deploymentId, JobGen jobGen) throws Exception {
try {
JobSpecification treeCreateSpec = jobGen.generateCreatingJob();
@@ -196,6 +362,15 @@
}
}
+ private void runClearState(DeploymentId deploymentId, JobGen jobGen) throws Exception {
+ try {
+ JobSpecification clear = jobGen.generateClearState();
+ execute(deploymentId, clear);
+ } catch (Exception e) {
+ throw e;
+ }
+ }
+
private void runJobArray(DeploymentId deploymentId, JobSpecification[] jobs) throws Exception {
for (JobSpecification job : jobs) {
execute(deploymentId, job);
@@ -204,6 +379,7 @@
private void execute(DeploymentId deploymentId, JobSpecification job) throws Exception {
job.setUseConnectorPolicyForScheduling(false);
+ job.setMaxReattempts(0);
JobId jobId = hcc.startJob(deploymentId, job,
profiling ? EnumSet.of(JobFlag.PROFILE_RUNTIME) : EnumSet.noneOf(JobFlag.class));
hcc.waitForCompletion(jobId);
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
index 478ac07..4863378 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
@@ -15,8 +15,14 @@
package edu.uci.ics.pregelix.core.jobgen;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
import java.io.DataOutput;
+import java.io.DataOutputStream;
import java.io.File;
+import java.io.IOException;
import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
@@ -24,10 +30,18 @@
import java.util.logging.Logger;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
@@ -50,6 +64,7 @@
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
@@ -67,9 +82,12 @@
import edu.uci.ics.hyracks.storage.common.file.TransientLocalResourceFactoryProvider;
import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
import edu.uci.ics.pregelix.api.graph.MessageCombiner;
+import edu.uci.ics.pregelix.api.graph.MsgList;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.graph.VertexPartitioner;
import edu.uci.ics.pregelix.api.io.VertexInputFormat;
+import edu.uci.ics.pregelix.api.io.internal.InternalVertexInputFormat;
+import edu.uci.ics.pregelix.api.io.internal.InternalVertexOutputFormat;
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.api.util.ReflectionUtils;
@@ -79,15 +97,24 @@
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableComparingBinaryComparatorFactory;
import edu.uci.ics.pregelix.core.util.DataflowUtils;
+import edu.uci.ics.pregelix.dataflow.ClearStateOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.HDFSFileWriteOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.KeyValueParserFactory;
+import edu.uci.ics.pregelix.dataflow.MaterializingReadOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.VertexFileScanOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.VertexFileWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.VertexWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
import edu.uci.ics.pregelix.runtime.bootstrap.IndexLifeCycleManagerProvider;
import edu.uci.ics.pregelix.runtime.bootstrap.StorageManagerInterface;
import edu.uci.ics.pregelix.runtime.bootstrap.VirtualBufferCacheProvider;
+import edu.uci.ics.pregelix.runtime.touchpoint.RecoveryRuntimeHookFactory;
import edu.uci.ics.pregelix.runtime.touchpoint.RuntimeHookFactory;
import edu.uci.ics.pregelix.runtime.touchpoint.VertexIdPartitionComputerFactory;
import edu.uci.ics.pregelix.runtime.touchpoint.VertexPartitionComputerFactory;
@@ -99,8 +126,8 @@
protected static final float DEFAULT_BTREE_FILL_FACTOR = 1.00f;
protected static final int tableSize = 10485767;
protected static final String PRIMARY_INDEX = "primary";
- protected final Configuration conf;
- protected final PregelixJob giraphJob;
+ protected Configuration conf;
+ protected PregelixJob pregelixJob;
protected IIndexLifecycleManagerProvider lcManagerProvider = IndexLifeCycleManagerProvider.INSTANCE;
protected IStorageManagerInterface storageManagerInterface = StorageManagerInterface.INSTANCE;
protected String jobId = new UUID(System.currentTimeMillis(), System.nanoTime()).toString();
@@ -111,13 +138,15 @@
protected static final String SECONDARY_INDEX_EVEN = "secondary2";
public JobGen(PregelixJob job) {
- this.conf = job.getConfiguration();
- this.giraphJob = job;
- this.initJobConfiguration();
- job.setJobId(jobId);
+ init(job);
+ }
- // set the frame size to be the one user specified if the user did
- // specify.
+ private void init(PregelixJob job) {
+ conf = job.getConfiguration();
+ pregelixJob = job;
+ initJobConfiguration();
+ job.setJobId(jobId);
+ // set the frame size to be the one user specified if the user did specify.
int specifiedFrameSize = BspUtils.getFrameSize(job.getConfiguration());
if (specifiedFrameSize > 0) {
frameSize = specifiedFrameSize;
@@ -128,6 +157,10 @@
}
}
+ public void reset(PregelixJob job) {
+ init(job);
+ }
+
@SuppressWarnings({ "rawtypes", "unchecked" })
private void initJobConfiguration() {
Class vertexClass = conf.getClass(PregelixJob.VERTEX_CLASS, Vertex.class);
@@ -187,74 +220,6 @@
return spec;
}
- @SuppressWarnings({ "rawtypes", "unchecked" })
- @Override
- public JobSpecification generateLoadingJob() throws HyracksException {
- Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(conf);
- Class<? extends Writable> vertexClass = BspUtils.getVertexClass(conf);
- JobSpecification spec = new JobSpecification();
- IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, PRIMARY_INDEX);
-
- /**
- * the graph file scan operator and use count constraint first, will use
- * absolute constraint later
- */
- VertexInputFormat inputFormat = BspUtils.createVertexInputFormat(conf);
- List<InputSplit> splits = new ArrayList<InputSplit>();
- try {
- splits = inputFormat.getSplits(giraphJob, fileSplitProvider.getFileSplits().length);
- LOGGER.info("number of splits: " + splits.size());
- for (InputSplit split : splits)
- LOGGER.info(split.toString());
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
- RecordDescriptor recordDescriptor = DataflowUtils.getRecordDescriptorFromKeyValueClasses(
- vertexIdClass.getName(), vertexClass.getName());
- IConfigurationFactory confFactory = new ConfigurationFactory(conf);
- String[] readSchedule = ClusterConfig.getHdfsScheduler().getLocationConstraints(splits);
- VertexFileScanOperatorDescriptor scanner = new VertexFileScanOperatorDescriptor(spec, recordDescriptor, splits,
- readSchedule, confFactory);
- ClusterConfig.setLocationConstraint(spec, scanner);
-
- /**
- * construct sort operator
- */
- int[] sortFields = new int[1];
- sortFields[0] = 0;
- INormalizedKeyComputerFactory nkmFactory = JobGenUtil.getINormalizedKeyComputerFactory(conf);
- IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
- comparatorFactories[0] = new WritableComparingBinaryComparatorFactory(WritableComparator.get(vertexIdClass)
- .getClass());
- ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, maxFrameNumber, sortFields,
- nkmFactory, comparatorFactories, recordDescriptor);
- ClusterConfig.setLocationConstraint(spec, sorter);
-
- /**
- * construct tree bulk load operator
- */
- int[] fieldPermutation = new int[2];
- fieldPermutation[0] = 0;
- fieldPermutation[1] = 1;
- ITypeTraits[] typeTraits = new ITypeTraits[2];
- typeTraits[0] = new TypeTraits(false);
- typeTraits[1] = new TypeTraits(false);
- TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
- storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
- sortFields, fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, false, 0, false,
- getIndexDataflowHelperFactory(), NoOpOperationCallbackFactory.INSTANCE);
- ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
-
- /**
- * connect operator descriptors
- */
- ITuplePartitionComputerFactory hashPartitionComputerFactory = getVertexPartitionComputerFactory();
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 0, sorter, 0);
- spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
- spec.setFrameSize(frameSize);
- return spec;
- }
-
@Override
public JobSpecification generateJob(int iteration) throws HyracksException {
if (iteration <= 0)
@@ -280,7 +245,7 @@
VertexInputFormat inputFormat = BspUtils.createVertexInputFormat(conf);
List<InputSplit> splits = new ArrayList<InputSplit>();
try {
- splits = inputFormat.getSplits(giraphJob, fileSplitProvider.getFileSplits().length);
+ splits = inputFormat.getSplits(pregelixJob, fileSplitProvider.getFileSplits().length);
} catch (Exception e) {
throw new HyracksDataException(e);
}
@@ -398,8 +363,201 @@
return spec;
}
- @SuppressWarnings({ "rawtypes", "unchecked" })
public JobSpecification scanIndexWriteGraph() throws HyracksException {
+ JobSpecification spec = scanIndexWriteToHDFS(conf);
+ return spec;
+ }
+
+ @Override
+ public JobSpecification[] generateCheckpointing(int lastSuccessfulIteration) throws HyracksException {
+ try {
+ PregelixJob tmpJob = this.createCloneJob("Vertex checkpointing for job " + jobId, pregelixJob);
+ tmpJob.setVertexOutputFormatClass(InternalVertexOutputFormat.class);
+ FileOutputFormat.setOutputPath(tmpJob,
+ new Path(BspUtils.getVertexCheckpointPath(conf, lastSuccessfulIteration)));
+ tmpJob.setOutputKeyClass(NullWritable.class);
+ tmpJob.setOutputValueClass(BspUtils.getVertexClass(tmpJob.getConfiguration()));
+ FileSystem dfs = FileSystem.get(tmpJob.getConfiguration());
+
+ dfs.delete(new Path(BspUtils.getVertexCheckpointPath(conf, lastSuccessfulIteration)), true);
+ JobSpecification vertexCkpSpec = scanIndexWriteToHDFS(tmpJob.getConfiguration());
+
+ dfs.delete(new Path(BspUtils.getMessageCheckpointPath(conf, lastSuccessfulIteration)), true);
+ JobSpecification[] stateCkpSpecs = generateStateCheckpointing(lastSuccessfulIteration);
+ JobSpecification[] specs = new JobSpecification[1 + stateCkpSpecs.length];
+
+ specs[0] = vertexCkpSpec;
+ for (int i = 1; i < specs.length; i++) {
+ specs[i] = stateCkpSpecs[i - 1];
+ }
+ return specs;
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ @Override
+ public JobSpecification generateLoadingJob() throws HyracksException {
+ JobSpecification spec = loadHDFSData(pregelixJob);
+ return spec;
+ }
+
+ public void clearCheckpoints() throws IOException {
+ FileSystem dfs = FileSystem.get(conf);
+ // clear the checkpoint directory
+ dfs.delete(new Path("/tmp/ckpoint/" + jobId), true);
+ }
+
+ @Override
+ public JobSpecification[] generateLoadingCheckpoint(int lastCheckpointedIteration) throws HyracksException {
+ try {
+ PregelixJob tmpJob = this.createCloneJob("Vertex checkpoint loading for job " + jobId, pregelixJob);
+ tmpJob.setVertexInputFormatClass(InternalVertexInputFormat.class);
+ FileInputFormat.setInputPaths(tmpJob,
+ new Path(BspUtils.getVertexCheckpointPath(conf, lastCheckpointedIteration)));
+ JobSpecification[] cleanVertices = generateCleanup();
+ JobSpecification createIndex = generateCreatingJob();
+ JobSpecification vertexLoadSpec = loadHDFSData(tmpJob);
+ JobSpecification[] stateLoadSpecs = generateStateCheckpointLoading(lastCheckpointedIteration, tmpJob);
+ JobSpecification[] specs = new JobSpecification[cleanVertices.length + 2 + stateLoadSpecs.length];
+
+ int i = 0;
+ for (; i < cleanVertices.length; i++) {
+ specs[i] = cleanVertices[i];
+ }
+ specs[i++] = createIndex;
+ specs[i++] = vertexLoadSpec;
+ for (; i < specs.length; i++) {
+ specs[i] = stateLoadSpecs[i - cleanVertices.length - 2];
+ }
+ return specs;
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+
+ /***
+ * generate a "clear state" job
+ */
+ public JobSpecification generateClearState() throws HyracksException {
+ JobSpecification spec = new JobSpecification();
+ ClearStateOperatorDescriptor clearState = new ClearStateOperatorDescriptor(spec, jobId);
+ ClusterConfig.setLocationConstraint(spec, clearState);
+ spec.addRoot(clearState);
+ return spec;
+ }
+
+ /***
+ * drop the sindex
+ *
+ * @return JobSpecification
+ * @throws HyracksException
+ */
+ protected JobSpecification dropIndex(String indexName) throws HyracksException {
+ JobSpecification spec = new JobSpecification();
+
+ IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, indexName);
+ IndexDropOperatorDescriptor drop = new IndexDropOperatorDescriptor(spec, storageManagerInterface,
+ lcManagerProvider, fileSplitProvider, getIndexDataflowHelperFactory());
+
+ ClusterConfig.setLocationConstraint(spec, drop);
+ spec.addRoot(drop);
+ spec.setFrameSize(frameSize);
+ return spec;
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ protected ITuplePartitionComputerFactory getVertexPartitionComputerFactory() {
+ IConfigurationFactory confFactory = new ConfigurationFactory(conf);
+ Class<? extends VertexPartitioner> partitionerClazz = BspUtils.getVertexPartitionerClass(conf);
+ if (partitionerClazz != null) {
+ return new VertexPartitionComputerFactory(confFactory);
+ } else {
+ return new VertexIdPartitionComputerFactory(new WritableSerializerDeserializerFactory(
+ BspUtils.getVertexIndexClass(conf)));
+ }
+ }
+
+ protected IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
+ if (BspUtils.useLSM(conf)) {
+ return new LSMBTreeDataflowHelperFactory(new VirtualBufferCacheProvider(), new ConstantMergePolicyProvider(
+ 3), NoOpOperationTrackerProvider.INSTANCE, SynchronousSchedulerProvider.INSTANCE,
+ NoOpIOOperationCallback.INSTANCE, 0.01);
+ } else {
+ return new BTreeDataflowHelperFactory();
+ }
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private JobSpecification loadHDFSData(PregelixJob job) throws HyracksException, HyracksDataException {
+ Configuration conf = job.getConfiguration();
+ Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(conf);
+ Class<? extends Writable> vertexClass = BspUtils.getVertexClass(conf);
+ JobSpecification spec = new JobSpecification();
+ IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, PRIMARY_INDEX);
+
+ /**
+ * the graph file scan operator and use count constraint first, will use
+ * absolute constraint later
+ */
+ VertexInputFormat inputFormat = BspUtils.createVertexInputFormat(conf);
+ List<InputSplit> splits = new ArrayList<InputSplit>();
+ try {
+ splits = inputFormat.getSplits(job, fileSplitProvider.getFileSplits().length);
+ LOGGER.info("number of splits: " + splits.size());
+ for (InputSplit split : splits)
+ LOGGER.info(split.toString());
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ RecordDescriptor recordDescriptor = DataflowUtils.getRecordDescriptorFromKeyValueClasses(
+ vertexIdClass.getName(), vertexClass.getName());
+ IConfigurationFactory confFactory = new ConfigurationFactory(conf);
+ String[] readSchedule = ClusterConfig.getHdfsScheduler().getLocationConstraints(splits);
+ VertexFileScanOperatorDescriptor scanner = new VertexFileScanOperatorDescriptor(spec, recordDescriptor, splits,
+ readSchedule, confFactory);
+ ClusterConfig.setLocationConstraint(spec, scanner);
+
+ /**
+ * construct sort operator
+ */
+ int[] sortFields = new int[1];
+ sortFields[0] = 0;
+ INormalizedKeyComputerFactory nkmFactory = JobGenUtil.getINormalizedKeyComputerFactory(conf);
+ IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
+ comparatorFactories[0] = new WritableComparingBinaryComparatorFactory(WritableComparator.get(vertexIdClass)
+ .getClass());
+ ExternalSortOperatorDescriptor sorter = new ExternalSortOperatorDescriptor(spec, maxFrameNumber, sortFields,
+ nkmFactory, comparatorFactories, recordDescriptor);
+ ClusterConfig.setLocationConstraint(spec, sorter);
+
+ /**
+ * construct tree bulk load operator
+ */
+ int[] fieldPermutation = new int[2];
+ fieldPermutation[0] = 0;
+ fieldPermutation[1] = 1;
+ ITypeTraits[] typeTraits = new ITypeTraits[2];
+ typeTraits[0] = new TypeTraits(false);
+ typeTraits[1] = new TypeTraits(false);
+ TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
+ storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
+ sortFields, fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, true, 0, false,
+ getIndexDataflowHelperFactory(), NoOpOperationCallbackFactory.INSTANCE);
+ ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
+
+ /**
+ * connect operator descriptors
+ */
+ ITuplePartitionComputerFactory hashPartitionComputerFactory = getVertexPartitionComputerFactory();
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 0, sorter, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), sorter, 0, btreeBulkLoad, 0);
+ spec.setFrameSize(frameSize);
+ return spec;
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private JobSpecification scanIndexWriteToHDFS(Configuration conf) throws HyracksDataException, HyracksException {
Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(conf);
Class<? extends Writable> vertexClass = BspUtils.getVertexClass(conf);
JobSpecification spec = new JobSpecification();
@@ -444,7 +602,8 @@
*/
IRecordDescriptorFactory inputRdFactory = DataflowUtils.getWritableRecordDescriptorFactoryFromWritableClasses(
vertexIdClass.getName(), vertexClass.getName());
- HDFSFileWriteOperatorDescriptor writer = new HDFSFileWriteOperatorDescriptor(spec, confFactory, inputRdFactory);
+ VertexFileWriteOperatorDescriptor writer = new VertexFileWriteOperatorDescriptor(spec, confFactory,
+ inputRdFactory);
ClusterConfig.setLocationConstraint(spec, writer);
/**
@@ -456,45 +615,137 @@
return spec;
}
- /***
- * drop the sindex
- *
- * @return JobSpecification
- * @throws HyracksException
- */
- protected JobSpecification dropIndex(String indexName) throws HyracksException {
- JobSpecification spec = new JobSpecification();
-
- IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, indexName);
- IndexDropOperatorDescriptor drop = new IndexDropOperatorDescriptor(spec, storageManagerInterface,
- lcManagerProvider, fileSplitProvider, getIndexDataflowHelperFactory());
-
- ClusterConfig.setLocationConstraint(spec, drop);
- spec.addRoot(drop);
- spec.setFrameSize(frameSize);
- return spec;
- }
-
- @SuppressWarnings({ "unchecked", "rawtypes" })
- protected ITuplePartitionComputerFactory getVertexPartitionComputerFactory() {
- IConfigurationFactory confFactory = new ConfigurationFactory(conf);
- Class<? extends VertexPartitioner> partitionerClazz = BspUtils.getVertexPartitionerClass(conf);
- if (partitionerClazz != null) {
- return new VertexPartitionComputerFactory(confFactory);
- } else {
- return new VertexIdPartitionComputerFactory(new WritableSerializerDeserializerFactory(
- BspUtils.getVertexIndexClass(conf)));
+ protected PregelixJob createCloneJob(String newJobName, PregelixJob oldJob) throws HyracksException {
+ try {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(bos);
+ oldJob.getConfiguration().write(dos);
+ PregelixJob newJob = new PregelixJob(newJobName);
+ DataInput dis = new DataInputStream(new ByteArrayInputStream(bos.toByteArray()));
+ newJob.getConfiguration().readFields(dis);
+ return newJob;
+ } catch (IOException e) {
+ throw new HyracksException(e);
}
}
- protected IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
- if (BspUtils.useLSM(conf)) {
- return new LSMBTreeDataflowHelperFactory(new VirtualBufferCacheProvider(), new ConstantMergePolicyProvider(
- 3), NoOpOperationTrackerProvider.INSTANCE, SynchronousSchedulerProvider.INSTANCE,
- NoOpIOOperationCallback.INSTANCE, 0.01);
- } else {
- return new BTreeDataflowHelperFactory();
- }
+ /** generate plan specific state checkpointing */
+ protected JobSpecification[] generateStateCheckpointing(int lastSuccessfulIteration) throws HyracksException {
+ Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(conf);
+ JobSpecification spec = new JobSpecification();
+
+ /**
+ * source aggregate
+ */
+ RecordDescriptor rdFinal = DataflowUtils.getRecordDescriptorFromKeyValueClasses(vertexIdClass.getName(),
+ MsgList.class.getName());
+
+ /**
+ * construct empty tuple operator
+ */
+ EmptyTupleSourceOperatorDescriptor emptyTupleSource = new EmptyTupleSourceOperatorDescriptor(spec);
+ ClusterConfig.setLocationConstraint(spec, emptyTupleSource);
+
+ /**
+ * construct the materializing write operator
+ */
+ MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal,
+ false);
+ ClusterConfig.setLocationConstraint(spec, materializeRead);
+
+ String checkpointPath = BspUtils.getMessageCheckpointPath(conf, lastSuccessfulIteration);;
+ PregelixJob tmpJob = createCloneJob("State checkpointing for job " + jobId, pregelixJob);
+ tmpJob.setOutputFormatClass(SequenceFileOutputFormat.class);
+ FileOutputFormat.setOutputPath(tmpJob, new Path(checkpointPath));
+ tmpJob.setOutputKeyClass(vertexIdClass);
+ tmpJob.setOutputValueClass(MsgList.class);
+
+ IRecordDescriptorFactory inputRdFactory = DataflowUtils.getWritableRecordDescriptorFactoryFromWritableClasses(
+ vertexIdClass.getName(), MsgList.class.getName());
+ HDFSFileWriteOperatorDescriptor hdfsWriter = new HDFSFileWriteOperatorDescriptor(spec, tmpJob, inputRdFactory);
+ ClusterConfig.setLocationConstraint(spec, hdfsWriter);
+
+ spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, materializeRead, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, hdfsWriter, 0);
+ spec.setFrameSize(frameSize);
+ return new JobSpecification[] { spec };
+ }
+
+ /** load plan specific state checkpoints */
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ protected JobSpecification[] generateStateCheckpointLoading(int lastCheckpointedIteration, PregelixJob job)
+ throws HyracksException {
+ String checkpointPath = BspUtils.getMessageCheckpointPath(job.getConfiguration(), lastCheckpointedIteration);
+ PregelixJob tmpJob = createCloneJob("State checkpoint loading for job " + jobId, job);
+ tmpJob.setInputFormatClass(SequenceFileInputFormat.class);
+ try {
+ FileInputFormat.setInputPaths(tmpJob, checkpointPath);
+ } catch (IOException e) {
+ throw new HyracksException(e);
+ }
+ Configuration conf = tmpJob.getConfiguration();
+ Class vertexIdClass = BspUtils.getVertexIndexClass(conf);
+ JobSpecification spec = new JobSpecification();
+
+ /***
+ * HDFS read operator
+ */
+ List<InputSplit> splits = new ArrayList<InputSplit>();
+ try {
+ InputFormat inputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(job.getInputFormatClass(),
+ job.getConfiguration());
+ splits = inputFormat.getSplits(tmpJob);
+ LOGGER.info("number of splits: " + splits.size());
+ for (InputSplit split : splits)
+ LOGGER.info(split.toString());
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ RecordDescriptor recordDescriptor = DataflowUtils.getRecordDescriptorFromKeyValueClasses(
+ vertexIdClass.getName(), MsgList.class.getName());
+ String[] readSchedule = ClusterConfig.getHdfsScheduler().getLocationConstraints(splits);
+ HDFSReadOperatorDescriptor scanner = new HDFSReadOperatorDescriptor(spec, recordDescriptor, tmpJob, splits,
+ readSchedule, new KeyValueParserFactory());
+ ClusterConfig.setLocationConstraint(spec, scanner);
+
+ /** construct the sort operator to sort message states */
+ int[] keyFields = new int[] { 0 };
+ INormalizedKeyComputerFactory nkmFactory = JobGenUtil.getINormalizedKeyComputerFactory(conf);
+ IBinaryComparatorFactory[] sortCmpFactories = new IBinaryComparatorFactory[1];
+ sortCmpFactories[0] = JobGenUtil.getIBinaryComparatorFactory(lastCheckpointedIteration,
+ WritableComparator.get(vertexIdClass).getClass());
+ ExternalSortOperatorDescriptor sort = new ExternalSortOperatorDescriptor(spec, maxFrameNumber, keyFields,
+ nkmFactory, sortCmpFactories, recordDescriptor);
+ ClusterConfig.setLocationConstraint(spec, sort);
+
+ /**
+ * construct the materializing write operator
+ */
+ MaterializingWriteOperatorDescriptor materialize = new MaterializingWriteOperatorDescriptor(spec,
+ recordDescriptor);
+ ClusterConfig.setLocationConstraint(spec, materialize);
+
+ /** construct runtime hook */
+ RuntimeHookOperatorDescriptor postSuperStep = new RuntimeHookOperatorDescriptor(spec,
+ new RecoveryRuntimeHookFactory(jobId, lastCheckpointedIteration, new ConfigurationFactory(
+ pregelixJob.getConfiguration())));
+ ClusterConfig.setLocationConstraint(spec, postSuperStep);
+
+ /** construct empty sink operator */
+ EmptySinkOperatorDescriptor emptySink = new EmptySinkOperatorDescriptor(spec);
+ ClusterConfig.setLocationConstraint(spec, emptySink);
+
+ /**
+ * connect operator descriptors
+ */
+ ITuplePartitionComputerFactory hashPartitionComputerFactory = getVertexPartitionComputerFactory();
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 0,
+ sort, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, materialize, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), materialize, 0, postSuperStep, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), postSuperStep, 0, emptySink, 0);
+ spec.setFrameSize(frameSize);
+ return new JobSpecification[] { spec };
}
/** generate non-first iteration job */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
index db6c2c8..9389f62 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
@@ -15,26 +15,44 @@
package edu.uci.ics.pregelix.core.jobgen;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.mapreduce.InputFormat;
+import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.ITuplePartitionComputerFactory;
import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.misc.ConstantTupleSourceOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
@@ -50,10 +68,14 @@
import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.FinalAggregateOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.HDFSFileWriteOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.KeyValueParserFactory;
import edu.uci.ics.pregelix.dataflow.MaterializingReadOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.group.ClusteredGroupOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
@@ -69,6 +91,7 @@
import edu.uci.ics.pregelix.runtime.touchpoint.RuntimeHookFactory;
public class JobGenInnerJoin extends JobGen {
+ private static final Logger LOGGER = Logger.getLogger(JobGen.class.getName());
public JobGenInnerJoin(PregelixJob job) {
super(job);
@@ -178,18 +201,18 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
/**
* construct global group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -318,7 +341,8 @@
/**
* construct the materializing write operator
*/
- MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal);
+ MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal,
+ true);
ClusterConfig.setLocationConstraint(spec, materializeRead);
/**
@@ -383,18 +407,18 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
/**
* construct global group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -496,6 +520,184 @@
return spec;
}
+ /** generate plan specific state checkpointing */
+ protected JobSpecification[] generateStateCheckpointing(int lastSuccessfulIteration) throws HyracksException {
+ JobSpecification[] msgCkpSpecs = super.generateStateCheckpointing(lastSuccessfulIteration);
+
+ /** generate secondary index checkpoint */
+ PregelixJob tmpJob = this.createCloneJob("Secondary index checkpointing for job " + jobId, pregelixJob);
+
+ JobSpecification secondaryBTreeCkp = generateSecondaryBTreeCheckpoint(lastSuccessfulIteration, tmpJob);
+
+ JobSpecification[] specs = new JobSpecification[msgCkpSpecs.length + 1];
+ for (int i = 0; i < msgCkpSpecs.length; i++) {
+ specs[i] = msgCkpSpecs[i];
+ }
+ specs[specs.length - 1] = secondaryBTreeCkp;
+ return specs;
+ }
+
+ /**
+ * generate plan specific checkpoint loading
+ */
+ @Override
+ protected JobSpecification[] generateStateCheckpointLoading(int lastSuccessfulIteration, PregelixJob job)
+ throws HyracksException {
+ /** generate message checkpoint load */
+ JobSpecification[] msgCkpSpecs = super.generateStateCheckpointLoading(lastSuccessfulIteration, job);
+
+ /** generate secondary index checkpoint load */
+ PregelixJob tmpJob = this.createCloneJob("Secondary index checkpoint loading for job " + jobId, pregelixJob);
+ tmpJob.setOutputFormatClass(SequenceFileOutputFormat.class);
+ JobSpecification secondaryBTreeCkpLoad = generateSecondaryBTreeCheckpointLoad(lastSuccessfulIteration, tmpJob);
+ JobSpecification[] specs = new JobSpecification[msgCkpSpecs.length + 1];
+ for (int i = 0; i < msgCkpSpecs.length; i++) {
+ specs[i] = msgCkpSpecs[i];
+ }
+ specs[specs.length - 1] = secondaryBTreeCkpLoad;
+ return specs;
+ }
+
+ @SuppressWarnings({ "unchecked", "rawtypes" })
+ private JobSpecification generateSecondaryBTreeCheckpointLoad(int lastSuccessfulIteration, PregelixJob job)
+ throws HyracksException {
+ Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(job.getConfiguration());
+ JobSpecification spec = new JobSpecification();
+
+ String checkpointPath = BspUtils.getSecondaryIndexCheckpointPath(conf, lastSuccessfulIteration);
+ PregelixJob tmpJob = createCloneJob("State checkpoint loading for job " + jobId, job);
+ tmpJob.setInputFormatClass(SequenceFileInputFormat.class);
+ try {
+ FileInputFormat.setInputPaths(tmpJob, checkpointPath);
+ } catch (IOException e) {
+ throw new HyracksException(e);
+ }
+
+ /***
+ * HDFS read operator
+ */
+ List<InputSplit> splits = new ArrayList<InputSplit>();
+ try {
+ InputFormat inputFormat = org.apache.hadoop.util.ReflectionUtils.newInstance(job.getInputFormatClass(),
+ job.getConfiguration());
+ splits = inputFormat.getSplits(tmpJob);
+ LOGGER.info("number of splits: " + splits.size());
+ for (InputSplit split : splits)
+ LOGGER.info(split.toString());
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ RecordDescriptor recordDescriptor = DataflowUtils.getRecordDescriptorFromKeyValueClasses(
+ vertexIdClass.getName(), MsgList.class.getName());
+ String[] readSchedule = ClusterConfig.getHdfsScheduler().getLocationConstraints(splits);
+ HDFSReadOperatorDescriptor scanner = new HDFSReadOperatorDescriptor(spec, recordDescriptor, tmpJob, splits,
+ readSchedule, new KeyValueParserFactory());
+ ClusterConfig.setLocationConstraint(spec, scanner);
+
+ /** construct the sort operator to sort message states */
+ int[] keyFields = new int[] { 0 };
+ INormalizedKeyComputerFactory nkmFactory = JobGenUtil.getINormalizedKeyComputerFactory(conf);
+ IBinaryComparatorFactory[] sortCmpFactories = new IBinaryComparatorFactory[1];
+ sortCmpFactories[0] = JobGenUtil.getIBinaryComparatorFactory(lastSuccessfulIteration,
+ WritableComparator.get(vertexIdClass).getClass());
+ ExternalSortOperatorDescriptor sort = new ExternalSortOperatorDescriptor(spec, maxFrameNumber, keyFields,
+ nkmFactory, sortCmpFactories, recordDescriptor);
+ ClusterConfig.setLocationConstraint(spec, sort);
+
+ /**
+ * construct bulk-load index operator
+ */
+ ITypeTraits[] typeTraits = new ITypeTraits[2];
+ typeTraits[0] = new TypeTraits(false);
+ typeTraits[1] = new TypeTraits(false);
+ int fieldPermutation[] = new int[] { 0, 1 };
+ IBinaryComparatorFactory[] indexCmpFactories = new IBinaryComparatorFactory[1];
+ indexCmpFactories[0] = JobGenUtil.getIBinaryComparatorFactory(lastSuccessfulIteration + 1, WritableComparator
+ .get(vertexIdClass).getClass());
+ String writeFile = lastSuccessfulIteration % 2 == 0 ? SECONDARY_INDEX_EVEN : SECONDARY_INDEX_ODD;
+ IFileSplitProvider secondaryFileSplitProviderWrite = ClusterConfig.getFileSplitProvider(jobId, writeFile);
+ TreeIndexBulkReLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkReLoadOperatorDescriptor(spec,
+ storageManagerInterface, lcManagerProvider, secondaryFileSplitProviderWrite, typeTraits,
+ indexCmpFactories, fieldPermutation, new int[] { 0 }, DEFAULT_BTREE_FILL_FACTOR,
+ getIndexDataflowHelperFactory());
+ ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
+
+ /**
+ * connect operator descriptors
+ */
+ ITuplePartitionComputerFactory hashPartitionComputerFactory = getVertexPartitionComputerFactory();
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 0, sort, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), sort, 0, btreeBulkLoad, 0);
+ spec.setFrameSize(frameSize);
+
+ return spec;
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private JobSpecification generateSecondaryBTreeCheckpoint(int lastSuccessfulIteration, PregelixJob job)
+ throws HyracksException {
+ job.setOutputFormatClass(SequenceFileOutputFormat.class);
+ String checkpointPath = BspUtils.getSecondaryIndexCheckpointPath(conf, lastSuccessfulIteration);
+ FileOutputFormat.setOutputPath(job, new Path(checkpointPath));
+ job.setOutputKeyClass(BspUtils.getVertexIndexClass(job.getConfiguration()));
+ job.setOutputValueClass(MsgList.class);
+
+ Class<? extends WritableComparable<?>> vertexIdClass = BspUtils.getVertexIndexClass(job.getConfiguration());
+ Class<? extends Writable> msgListClass = MsgList.class;
+ String readFile = lastSuccessfulIteration % 2 == 0 ? SECONDARY_INDEX_EVEN : SECONDARY_INDEX_ODD;
+ IFileSplitProvider secondaryFileSplitProviderRead = ClusterConfig.getFileSplitProvider(jobId, readFile);
+ JobSpecification spec = new JobSpecification();
+ /**
+ * construct empty tuple operator
+ */
+ ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ DataOutput dos = tb.getDataOutput();
+ tb.reset();
+ UTF8StringSerializerDeserializer.INSTANCE.serialize("0", dos);
+ tb.addFieldEndOffset();
+ ISerializerDeserializer[] keyRecDescSers = { UTF8StringSerializerDeserializer.INSTANCE,
+ UTF8StringSerializerDeserializer.INSTANCE };
+ RecordDescriptor keyRecDesc = new RecordDescriptor(keyRecDescSers);
+ ConstantTupleSourceOperatorDescriptor emptyTupleSource = new ConstantTupleSourceOperatorDescriptor(spec,
+ keyRecDesc, tb.getFieldEndOffsets(), tb.getByteArray(), tb.getSize());
+ ClusterConfig.setLocationConstraint(spec, emptyTupleSource);
+
+ /**
+ * construct btree search operator
+ */
+ RecordDescriptor recordDescriptor = DataflowUtils.getRecordDescriptorFromKeyValueClasses(
+ vertexIdClass.getName(), msgListClass.getName());
+ IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[1];
+ comparatorFactories[0] = new WritableComparingBinaryComparatorFactory(WritableComparator.get(vertexIdClass)
+ .getClass());
+
+ ITypeTraits[] typeTraits = new ITypeTraits[2];
+ typeTraits[0] = new TypeTraits(false);
+ typeTraits[1] = new TypeTraits(false);
+
+ BTreeSearchOperatorDescriptor scanner = new BTreeSearchOperatorDescriptor(spec, recordDescriptor,
+ storageManagerInterface, lcManagerProvider, secondaryFileSplitProviderRead, typeTraits,
+ comparatorFactories, null, null, null, true, true, getIndexDataflowHelperFactory(), false,
+ NoOpOperationCallbackFactory.INSTANCE);
+ ClusterConfig.setLocationConstraint(spec, scanner);
+
+ /**
+ * construct write file operator
+ */
+ IRecordDescriptorFactory inputRdFactory = DataflowUtils.getWritableRecordDescriptorFactoryFromWritableClasses(
+ vertexIdClass.getName(), MsgList.class.getName());
+ HDFSFileWriteOperatorDescriptor writer = new HDFSFileWriteOperatorDescriptor(spec, job, inputRdFactory);
+ ClusterConfig.setLocationConstraint(spec, writer);
+
+ /**
+ * connect operator descriptors
+ */
+ spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, scanner, 0);
+ spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, writer, 0);
+ spec.setFrameSize(frameSize);
+ return spec;
+ }
+
@Override
public JobSpecification[] generateCleanup() throws HyracksException {
JobSpecification[] cleanups = new JobSpecification[3];
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
index 3af8921..287b797 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
@@ -32,8 +32,6 @@
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
@@ -54,6 +52,8 @@
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.group.ClusteredGroupOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
@@ -144,9 +144,9 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
@@ -155,9 +155,9 @@
*/
RecordDescriptor rdFinal = DataflowUtils.getRecordDescriptorFromKeyValueClasses(vertexIdClass.getName(),
MsgList.class.getName());
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -198,8 +198,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -208,8 +208,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
@@ -299,7 +299,8 @@
/**
* construct the materializing write operator
*/
- MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal);
+ MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal,
+ true);
ClusterConfig.setLocationConstraint(spec, materializeRead);
/**
@@ -323,9 +324,10 @@
IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
- JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
- nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
- null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
+ JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true,
+ getIndexDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
+ new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -342,18 +344,18 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
/**
* construct global group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -394,8 +396,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -404,8 +406,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
index 50949aa..3b3c9e7 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
@@ -31,8 +31,6 @@
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
@@ -53,6 +51,8 @@
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.group.ClusteredGroupOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
@@ -148,9 +148,9 @@
*/
RecordDescriptor rdFinal = DataflowUtils.getRecordDescriptorFromKeyValueClasses(vertexIdClass.getName(),
MsgList.class.getName());
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, false);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, false);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -190,8 +190,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -200,8 +200,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
@@ -288,7 +288,8 @@
/**
* construct the materializing write operator
*/
- MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal);
+ MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal,
+ true);
ClusterConfig.setLocationConstraint(spec, materializeRead);
/**
@@ -312,9 +313,10 @@
IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
- JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
- nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
- null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
+ JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true,
+ getIndexDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
+ new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -331,9 +333,9 @@
/**
* construct global group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, false);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, false);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -371,8 +373,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -381,8 +383,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
index 362e413..e334095 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
@@ -31,8 +31,6 @@
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.connectors.OneToOneConnectorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
-import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
@@ -53,6 +51,8 @@
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.group.ClusteredGroupOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
@@ -143,9 +143,9 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
@@ -161,9 +161,9 @@
*/
RecordDescriptor rdFinal = DataflowUtils.getRecordDescriptorFromKeyValueClasses(vertexIdClass.getName(),
MsgList.class.getName());
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -204,8 +204,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -214,8 +214,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
@@ -302,7 +302,8 @@
/**
* construct the materializing write operator
*/
- MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal);
+ MaterializingReadOperatorDescriptor materializeRead = new MaterializingReadOperatorDescriptor(spec, rdFinal,
+ true);
ClusterConfig.setLocationConstraint(spec, materializeRead);
/**
@@ -326,9 +327,10 @@
IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
- JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
- nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
- null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
+ JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true,
+ getIndexDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
+ new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -345,9 +347,9 @@
/**
* construct local pre-clustered group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf, false,
- false);
- PreclusteredGroupOperatorDescriptor localGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = DataflowUtils.getAccumulatingAggregatorFactory(conf,
+ false, false);
+ ClusteredGroupOperatorDescriptor localGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactory, rdUnnestedMessage);
ClusterConfig.setLocationConstraint(spec, localGby);
@@ -361,9 +363,9 @@
/**
* construct global group-by operator
*/
- IAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(conf,
- true, true);
- PreclusteredGroupOperatorDescriptor globalGby = new PreclusteredGroupOperatorDescriptor(spec, keyFields,
+ IClusteredAggregatorDescriptorFactory aggregatorFactoryFinal = DataflowUtils.getAccumulatingAggregatorFactory(
+ conf, true, true);
+ ClusteredGroupOperatorDescriptor globalGby = new ClusteredGroupOperatorDescriptor(spec, keyFields,
sortCmpFactories, aggregatorFactoryFinal, rdFinal);
ClusterConfig.setLocationConstraint(spec, globalGby);
@@ -404,8 +406,8 @@
int[] fieldPermutation = new int[] { 0, 1 };
TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
- NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
+ null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, insertOp);
/**
@@ -414,8 +416,8 @@
int[] fieldPermutationDelete = new int[] { 0 };
TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
- comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
- null, NoOpOperationCallbackFactory.INSTANCE);
+ comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
+ getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
ClusterConfig.setLocationConstraint(spec, deleteOp);
/** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/clusterconfig/ClusterConfig.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/clusterconfig/ClusterConfig.java
index 49309ec..89fbdcd 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/clusterconfig/ClusterConfig.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/clusterconfig/ClusterConfig.java
@@ -20,12 +20,16 @@
import java.net.InetAddress;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Random;
+import java.util.Set;
+import java.util.TreeMap;
import org.apache.hadoop.mapreduce.InputSplit;
@@ -51,6 +55,8 @@
private static Map<String, List<String>> ipToNcMapping;
private static String[] stores;
private static Scheduler hdfsScheduler;
+ private static Set<String> blackListNodes = new HashSet<String>();
+ private static IHyracksClientConnection hcc;
/**
* let tests set config path to be whatever
@@ -196,8 +202,19 @@
public static void loadClusterConfig(String ipAddress, int port) throws HyracksException {
try {
- IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc.getNodeControllerInfos();
+ if (hcc == null) {
+ hcc = new HyracksConnection(ipAddress, port);
+ }
+ Map<String, NodeControllerInfo> ncNameToNcInfos = new TreeMap<String, NodeControllerInfo>();
+ ncNameToNcInfos.putAll(hcc.getNodeControllerInfos());
+
+ /**
+ * remove black list nodes -- which had disk failures
+ */
+ for (String blackListNode : blackListNodes) {
+ ncNameToNcInfos.remove(blackListNode);
+ }
+
NCs = new String[ncNameToNcInfos.size()];
ipToNcMapping = new HashMap<String, List<String>>();
int i = 0;
@@ -214,7 +231,7 @@
i++;
}
- hdfsScheduler = new Scheduler(ipAddress, port);
+ hdfsScheduler = new Scheduler(hcc.getNodeControllerInfos(), hcc.getClusterTopology());
} catch (Exception e) {
throw new IllegalStateException(e);
}
@@ -226,4 +243,20 @@
public static Scheduler getHdfsScheduler() {
return hdfsScheduler;
}
+
+ public static String[] getLocationConstraint() throws HyracksException {
+ int count = 0;
+ String[] locations = new String[NCs.length * stores.length];
+ for (String nc : NCs) {
+ for (int i = 0; i < stores.length; i++) {
+ locations[count] = nc;
+ count++;
+ }
+ }
+ return locations;
+ }
+
+ public static void addToBlackListNodes(Collection<String> nodes) {
+ blackListNodes.addAll(nodes);
+ }
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/DataflowUtils.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/DataflowUtils.java
index 0876893..3e01109 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/DataflowUtils.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/DataflowUtils.java
@@ -21,9 +21,9 @@
import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.core.hadoop.config.ConfigurationFactory;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableRecordDescriptorFactory;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IAggregateFunctionFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
import edu.uci.ics.pregelix.runtime.simpleagg.AccumulatingAggregatorFactory;
@@ -75,11 +75,11 @@
return rdFactory;
}
- public static IAggregatorDescriptorFactory getAccumulatingAggregatorFactory(Configuration conf, boolean isFinal,
- boolean partialAggAsInput) {
+ public static IClusteredAggregatorDescriptorFactory getAccumulatingAggregatorFactory(Configuration conf,
+ boolean isFinal, boolean partialAggAsInput) {
IAggregateFunctionFactory aggFuncFactory = new AggregationFunctionFactory(new ConfigurationFactory(conf),
isFinal, partialAggAsInput);
- IAggregatorDescriptorFactory aggregatorFactory = new AccumulatingAggregatorFactory(
+ IClusteredAggregatorDescriptorFactory aggregatorFactory = new AccumulatingAggregatorFactory(
new IAggregateFunctionFactory[] { aggFuncFactory });
return aggregatorFactory;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/ExceptionUtilities.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/ExceptionUtilities.java
new file mode 100644
index 0000000..a4c4501
--- /dev/null
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/ExceptionUtilities.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.core.util;
+
+import java.io.IOException;
+import java.util.Set;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * The util to analysis exceptions
+ *
+ * @author yingyib
+ */
+public class ExceptionUtilities {
+
+ /**
+ * Check whether a exception is recoverable or not
+ *
+ * @param exception
+ * @return true or false
+ */
+ public static boolean recoverable(Exception exception, Set<String> blackListNodes) {
+ String message = exception.getMessage();
+
+ /***
+ * check interrupted exception
+ */
+ if (exception instanceof InterruptedException || (message.contains("Node") && message.contains("not live"))
+ || message.contains("Failure occurred on input")) {
+ return true;
+ }
+ Throwable cause = exception;
+ while ((cause = cause.getCause()) != null) {
+ if (cause instanceof InterruptedException) {
+ return true;
+ }
+ }
+
+ /***
+ * check io exception
+ */
+ cause = exception;
+ String blackListNode = null;
+ if (cause instanceof HyracksDataException) {
+ blackListNode = ((HyracksDataException) cause).getNodeId();
+ }
+ while ((cause = cause.getCause()) != null) {
+ if (cause instanceof IOException) {
+ if (containsIOManager(cause)) {
+ if (blackListNode != null) {
+ blackListNodes.add(blackListNode);
+ }
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Check if the exception traces contains the IOManager, which means there are disk failures
+ *
+ * @param cause
+ * @return true if IOManager is in the trace; false otherwise.
+ */
+ private static boolean containsIOManager(Throwable cause) {
+ StackTraceElement[] traces = cause.getStackTrace();
+ for (StackTraceElement e : traces) {
+ if (e.getClassName().endsWith("IOManager")) {
+ return true;
+ }
+ }
+ return false;
+ }
+}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
index 73b053f..aabd4ba 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/util/PregelixHyracksIntegrationUtil.java
@@ -14,8 +14,11 @@
*/
package edu.uci.ics.pregelix.core.util;
+import java.io.File;
import java.util.EnumSet;
+import org.apache.commons.io.FileUtils;
+
import edu.uci.ics.hyracks.api.client.HyracksConnection;
import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
import edu.uci.ics.hyracks.api.job.JobFlag;
@@ -46,6 +49,10 @@
private static IHyracksClientConnection hcc;
public static void init() throws Exception {
+ FileUtils.forceMkdir(new File("dev1"));
+ FileUtils.forceMkdir(new File("dev2"));
+ FileUtils.forceMkdir(new File("dev3"));
+ FileUtils.forceMkdir(new File("dev4"));
CCConfig ccConfig = new CCConfig();
ccConfig.clientNetIpAddress = CC_HOST;
ccConfig.clusterNetIpAddress = CC_HOST;
@@ -54,6 +61,8 @@
ccConfig.defaultMaxJobAttempts = 0;
ccConfig.jobHistorySize = 1;
ccConfig.profileDumpPeriod = -1;
+ ccConfig.heartbeatPeriod = 50;
+ ccConfig.maxHeartbeatLapsePeriods = 15;
// cluster controller
cc = new ClusterControllerService(ccConfig);
@@ -67,7 +76,7 @@
ncConfig1.dataIPAddress = "127.0.0.1";
ncConfig1.datasetIPAddress = "127.0.0.1";
ncConfig1.nodeId = NC1_ID;
- ncConfig1.ioDevices="dev1,dev2";
+ ncConfig1.ioDevices = "dev1,dev2";
ncConfig1.appNCMainClass = NCApplicationEntryPoint.class.getName();
nc1 = new NodeControllerService(ncConfig1);
nc1.start();
@@ -80,7 +89,7 @@
ncConfig2.datasetIPAddress = "127.0.0.1";
ncConfig2.nodeId = NC2_ID;
ncConfig2.appNCMainClass = NCApplicationEntryPoint.class.getName();
- ncConfig2.ioDevices="dev1,dev2";
+ ncConfig2.ioDevices = "dev3,dev4";
nc2 = new NodeControllerService(ncConfig2);
nc2.start();
@@ -89,6 +98,22 @@
ClusterConfig.loadClusterConfig(CC_HOST, TEST_HYRACKS_CC_CLIENT_PORT);
}
+ public static void startNC1() throws Exception {
+ nc1.start();
+ }
+
+ public static void shutdownNC1() throws Exception {
+ nc1.stop();
+ }
+
+ public static void shutdownNC2() throws Exception {
+ nc2.stop();
+ }
+
+ public static void shutdownCC() throws Exception {
+ cc.stop();
+ }
+
public static void deinit() throws Exception {
nc2.stop();
nc1.stop();
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/pregelix b/pregelix/pregelix-core/src/main/resources/scripts/pregelix
deleted file mode 100644
index 7232ccc..0000000
--- a/pregelix/pregelix-core/src/main/resources/scripts/pregelix
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/sh
-#
-#------------------------------------------------------------------------
-# Copyright 2009-2013 by The Regents of the University of California
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# you may obtain a copy of the License from
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ------------------------------------------------------------------------
-#
-
-
-# resolve links - $0 may be a softlink
-PRG="$0"
-
-while [ -h "$PRG" ]; do
- ls=`ls -ld "$PRG"`
- link=`expr "$ls" : '.*-> \(.*\)$'`
- if expr "$link" : '/.*' > /dev/null; then
- PRG="$link"
- else
- PRG=`dirname "$PRG"`/"$link"
- fi
-done
-
-PRGDIR=`dirname "$PRG"`
-BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
-
-
-
-# OS specific support. $var _must_ be set to either true or false.
-cygwin=false;
-darwin=false;
-case "`uname`" in
- CYGWIN*) cygwin=true ;;
- Darwin*) darwin=true
- if [ -z "$JAVA_VERSION" ] ; then
- JAVA_VERSION="CurrentJDK"
- else
- echo "Using Java version: $JAVA_VERSION"
- fi
- if [ -z "$JAVA_HOME" ] ; then
- JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
- fi
- ;;
-esac
-
-if [ -z "$JAVA_HOME" ] ; then
- if [ -r /etc/gentoo-release ] ; then
- JAVA_HOME=`java-config --jre-home`
- fi
-fi
-
-# For Cygwin, ensure paths are in UNIX format before anything is touched
-if $cygwin ; then
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
-fi
-
-# If a specific java binary isn't specified search for the standard 'java' binary
-if [ -z "$JAVACMD" ] ; then
- if [ -n "$JAVA_HOME" ] ; then
- if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
- # IBM's JDK on AIX uses strange locations for the executables
- JAVACMD="$JAVA_HOME/jre/sh/java"
- else
- JAVACMD="$JAVA_HOME/bin/java"
- fi
- else
- JAVACMD=`which java`
- fi
-fi
-
-if [ ! -x "$JAVACMD" ] ; then
- echo "Error: JAVA_HOME is not defined correctly." 1>&2
- echo " We cannot execute $JAVACMD" 1>&2
- exit 1
-fi
-
-if [ -z "$REPO" ]
-then
- REPO="$BASEDIR"/lib
-fi
-
-CLASSPATH=$CLASSPATH_PREFIX:"$HADOOP_HOME"/conf:/etc/hadoop/conf:"$BASEDIR"/etc:$1
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin; then
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
- [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
- [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
- [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
-fi
-
-exec "$JAVACMD" $JAVA_OPTS \
- -classpath "$CLASSPATH" \
- -Dapp.name="pregelix" \
- -Dapp.pid="$$" \
- -Dapp.repo="$REPO" \
- -Dapp.home="$BASEDIR" \
- -Dbasedir="$BASEDIR" \
- org.apache.hadoop.util.RunJar \
- "$@"
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/pregelix.bat b/pregelix/pregelix-core/src/main/resources/scripts/pregelix.bat
deleted file mode 100644
index fe53029..0000000
--- a/pregelix/pregelix-core/src/main/resources/scripts/pregelix.bat
+++ /dev/null
@@ -1,124 +0,0 @@
-@rem /*
-@rem Copyright 2009-2013 by The Regents of the University of California
-@rem Licensed under the Apache License, Version 2.0 (the "License");
-@rem you may not use this file except in compliance with the License.
-@rem you may obtain a copy of the License from
-@rem
-@rem http://www.apache.org/licenses/LICENSE-2.0
-@rem
-@rem Unless required by applicable law or agreed to in writing, software
-@rem distributed under the License is distributed on an "AS IS" BASIS,
-@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@rem See the License for the specific language governing permissions and
-@rem limitations under the License.
-@rem */
-@REM ----------------------------------------------------------------------------
-@REM Copyright 2001-2006 The Apache Software Foundation.
-@REM
-@REM Licensed under the Apache License, Version 2.0 (the "License");
-@REM you may not use this file except in compliance with the License.
-@REM You may obtain a copy of the License at
-@REM
-@REM http://www.apache.org/licenses/LICENSE-2.0
-@REM
-@REM Unless required by applicable law or agreed to in writing, software
-@REM distributed under the License is distributed on an "AS IS" BASIS,
-@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@REM See the License for the specific language governing permissions and
-@REM limitations under the License.
-@REM ----------------------------------------------------------------------------
-@REM
-@REM Copyright (c) 2001-2006 The Apache Software Foundation. All rights
-@REM reserved.
-
-@echo off
-
-set ERROR_CODE=0
-
-:init
-@REM Decide how to startup depending on the version of windows
-
-@REM -- Win98ME
-if NOT "%OS%"=="Windows_NT" goto Win9xArg
-
-@REM set local scope for the variables with windows NT shell
-if "%OS%"=="Windows_NT" @setlocal
-
-@REM -- 4NT shell
-if "%eval[2+2]" == "4" goto 4NTArgs
-
-@REM -- Regular WinNT shell
-set CMD_LINE_ARGS=%*
-goto WinNTGetScriptDir
-
-@REM The 4NT Shell from jp software
-:4NTArgs
-set CMD_LINE_ARGS=%$
-goto WinNTGetScriptDir
-
-:Win9xArg
-@REM Slurp the command line arguments. This loop allows for an unlimited number
-@REM of arguments (up to the command line limit, anyway).
-set CMD_LINE_ARGS=
-:Win9xApp
-if %1a==a goto Win9xGetScriptDir
-set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1
-shift
-goto Win9xApp
-
-:Win9xGetScriptDir
-set SAVEDIR=%CD%
-%0\
-cd %0\..\..
-set BASEDIR=%CD%
-cd %SAVEDIR%
-set SAVE_DIR=
-goto repoSetup
-
-:WinNTGetScriptDir
-set BASEDIR=%~dp0\..
-
-:repoSetup
-
-
-if "%JAVACMD%"=="" set JAVACMD=java
-
-if "%REPO%"=="" set REPO=%BASEDIR%\lib
-
-cp $BASEDIR"\..\a-hadoop-patch.jar "$REPO"\
-
-set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\a-hadoop-patch.jar;"%REPO%"\pregelix-api-0.0.1-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.2-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\commons-codec-1.3.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\hyracks-ipc-0.2.2-SNAPSHOT.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-data-std-0.2.2-SNAPSHOT.jar;"%REPO%"\hadoop-core-0.20.2.jar;"%REPO%"\commons-cli-1.2.jar;"%REPO%"\xmlenc-0.52.jar;"%REPO%"\commons-httpclient-3.0.1.jar;"%REPO%"\commons-net-1.4.1.jar;"%REPO%"\oro-2.0.8.jar;"%REPO%"\jetty-6.1.14.jar;"%REPO%"\jetty-util-6.1.14.jar;"%REPO%"\servlet-api-2.5-6.1.14.jar;"%REPO%"\jasper-runtime-5.5.12.jar;"%REPO%"\jasper-compiler-5.5.12.jar;"%REPO%"\jsp-api-2.1-6.1.14.jar;"%REPO%"\jsp-2.1-6.1.14.jar;"%REPO%"\core-3.1.1.jar;"%REPO%"\ant-1.6.5.jar;"%REPO%"\commons-el-1.0.jar;"%REPO%"\jets3t-0.7.1.jar;"%REPO%"\kfs-0.3.jar;"%REPO%"\hsqldb-1.8.0.10.jar;"%REPO%"\pregelix-dataflow-std-0.0.1-SNAPSHOT.jar;"%REPO%"\pregelix-dataflow-std-base-0.0.1-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-std-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-hadoop-0.2.2-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\hyracks-storage-am-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-storage-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-storage-am-btree-0.2.2-SNAPSHOT.jar;"%REPO%"\btreehelper-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.2-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-net-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-hadoop-compat-0.2.2-SNAPSHOT.jar;"%REPO%"\pregelix-dataflow-0.0.1-SNAPSHOT.jar;"%REPO%"\pregelix-runtime-0.0.1-SNAPSHOT.jar;"%REPO%"\hadoop-test-0.20.2.jar;"%REPO%"\ftplet-api-1.0.0.jar;"%REPO%"\mina-core-2.0.0-M5.jar;"%REPO%"\ftpserver-core-1.0.0.jar;"%REPO%"\ftpserver-deprecated-1.0.0-M2.jar;"%REPO%"\javax.servlet-api-3.0.1.jar;"%REPO%"\pregelix-core-0.0.1-SNAPSHOT.jar
-goto endInit
-
-@REM Reaching here means variables are defined and arguments have been captured
-:endInit
-
-%JAVACMD% %JAVA_OPTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="pregelix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" org.apache.hadoop.util.RunJar %CMD_LINE_ARGS%
-if ERRORLEVEL 1 goto error
-goto end
-
-:error
-if "%OS%"=="Windows_NT" @endlocal
-set ERROR_CODE=%ERRORLEVEL%
-
-:end
-@REM set local scope for the variables with windows NT shell
-if "%OS%"=="Windows_NT" goto endNT
-
-@REM For old DOS remove the set variables from ENV - we assume they were not set
-@REM before we started - at least we don't leave any baggage around
-set CMD_LINE_ARGS=
-goto postExec
-
-:endNT
-@REM If error code is set to 1 then the endlocal was done already in :error.
-if %ERROR_CODE% EQU 0 @endlocal
-
-
-:postExec
-
-if "%FORCE_EXIT_ON_ERROR%" == "on" (
- if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%
-)
-
-exit /B %ERROR_CODE%
\ No newline at end of file
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/pregelixcc b/pregelix/pregelix-core/src/main/resources/scripts/pregelixcc
deleted file mode 100755
index c1ee3f2..0000000
--- a/pregelix/pregelix-core/src/main/resources/scripts/pregelixcc
+++ /dev/null
@@ -1,114 +0,0 @@
-#!/bin/sh
-#
-#------------------------------------------------------------------------
-# Copyright 2009-2013 by The Regents of the University of California
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# you may obtain a copy of the License from
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ------------------------------------------------------------------------
-#
-
-# resolve links - $0 may be a softlink
-PRG="$0"
-
-while [ -h "$PRG" ]; do
- ls=`ls -ld "$PRG"`
- link=`expr "$ls" : '.*-> \(.*\)$'`
- if expr "$link" : '/.*' > /dev/null; then
- PRG="$link"
- else
- PRG=`dirname "$PRG"`/"$link"
- fi
-done
-
-PRGDIR=`dirname "$PRG"`
-BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
-
-
-
-# OS specific support. $var _must_ be set to either true or false.
-cygwin=false;
-darwin=false;
-case "`uname`" in
- CYGWIN*) cygwin=true ;;
- Darwin*) darwin=true
- if [ -z "$JAVA_VERSION" ] ; then
- JAVA_VERSION="CurrentJDK"
- else
- echo "Using Java version: $JAVA_VERSION"
- fi
- if [ -z "$JAVA_HOME" ] ; then
- JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
- fi
- ;;
-esac
-
-if [ -z "$JAVA_HOME" ] ; then
- if [ -r /etc/gentoo-release ] ; then
- JAVA_HOME=`java-config --jre-home`
- fi
-fi
-
-# For Cygwin, ensure paths are in UNIX format before anything is touched
-if $cygwin ; then
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
-fi
-
-# If a specific java binary isn't specified search for the standard 'java' binary
-if [ -z "$JAVACMD" ] ; then
- if [ -n "$JAVA_HOME" ] ; then
- if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
- # IBM's JDK on AIX uses strange locations for the executables
- JAVACMD="$JAVA_HOME/jre/sh/java"
- else
- JAVACMD="$JAVA_HOME/bin/java"
- fi
- else
- JAVACMD=`which java`
- fi
-fi
-
-if [ ! -x "$JAVACMD" ] ; then
- echo "Error: JAVA_HOME is not defined correctly." 1>&2
- echo " We cannot execute $JAVACMD" 1>&2
- exit 1
-fi
-
-if [ -z "$REPO" ]
-then
- REPO="$BASEDIR"/lib
-fi
-
-CLASSPATH=$CLASSPATH_PREFIX:"$HADOOP_HOME"/conf:/etc/hadoop/conf:"$BASEDIR"/etc:$1
-
-for f in ${REPO}/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin; then
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
- [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
- [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
- [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
-fi
-
-exec "$JAVACMD" $JAVA_OPTS \
- -classpath "$CLASSPATH" \
- -Dapp.name="pregelixcc" \
- -Dapp.pid="$$" \
- -Dapp.repo="$REPO" \
- -Dapp.home="$BASEDIR" \
- -Dbasedir="$BASEDIR" \
- edu.uci.ics.hyracks.control.cc.CCDriver \
- "$@"
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/pregelixnc b/pregelix/pregelix-core/src/main/resources/scripts/pregelixnc
deleted file mode 100755
index c01b4b4..0000000
--- a/pregelix/pregelix-core/src/main/resources/scripts/pregelixnc
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/sh
-#
-#------------------------------------------------------------------------
-# Copyright 2009-2013 by The Regents of the University of California
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# you may obtain a copy of the License from
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ------------------------------------------------------------------------
-#
-
-# resolve links - $0 may be a softlink
-PRG="$0"
-
-while [ -h "$PRG" ]; do
- ls=`ls -ld "$PRG"`
- link=`expr "$ls" : '.*-> \(.*\)$'`
- if expr "$link" : '/.*' > /dev/null; then
- PRG="$link"
- else
- PRG=`dirname "$PRG"`/"$link"
- fi
-done
-
-PRGDIR=`dirname "$PRG"`
-BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
-
-
-
-# OS specific support. $var _must_ be set to either true or false.
-cygwin=false;
-darwin=false;
-case "`uname`" in
- CYGWIN*) cygwin=true ;;
- Darwin*) darwin=true
- if [ -z "$JAVA_VERSION" ] ; then
- JAVA_VERSION="CurrentJDK"
- else
- echo "Using Java version: $JAVA_VERSION"
- fi
- if [ -z "$JAVA_HOME" ] ; then
- JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
- fi
- ;;
-esac
-
-if [ -z "$JAVA_HOME" ] ; then
- if [ -r /etc/gentoo-release ] ; then
- JAVA_HOME=`java-config --jre-home`
- fi
-fi
-
-# For Cygwin, ensure paths are in UNIX format before anything is touched
-if $cygwin ; then
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
-fi
-
-# If a specific java binary isn't specified search for the standard 'java' binary
-if [ -z "$JAVACMD" ] ; then
- if [ -n "$JAVA_HOME" ] ; then
- if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
- # IBM's JDK on AIX uses strange locations for the executables
- JAVACMD="$JAVA_HOME/jre/sh/java"
- else
- JAVACMD="$JAVA_HOME/bin/java"
- fi
- else
- JAVACMD=`which java`
- fi
-fi
-
-if [ ! -x "$JAVACMD" ] ; then
- echo "Error: JAVA_HOME is not defined correctly." 1>&2
- echo " We cannot execute $JAVACMD" 1>&2
- exit 1
-fi
-
-if [ -z "$REPO" ]
-then
- REPO="$BASEDIR"/lib
-fi
-
-CLASSPATH=$CLASSPATH_PREFIX:"$HADOOP_HOME"/conf:/etc/hadoop/conf:"$BASEDIR"/etc:$1
-
-for f in ${REPO}/*.jar; do
- CLASSPATH=${CLASSPATH}:$f;
-done
-
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin; then
- [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
- [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
- [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`2
- [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
- [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
-fi
-
-exec "$JAVACMD" $JAVA_OPTS \
- -classpath "$CLASSPATH" \
- -Dapp.name="pregelixnc" \
- -Dapp.pid="$$" \
- -Dapp.repo="$REPO" \
- -Dapp.home="$BASEDIR" \
- -Dbasedir="$BASEDIR" \
- edu.uci.ics.hyracks.control.nc.NCDriver \
- -app-nc-main-class edu.uci.ics.pregelix.runtime.bootstrap.NCApplicationEntryPoint "$@"
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh b/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
index 2a6cab2..d7a0ead 100644
--- a/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/startcc.sh
@@ -58,8 +58,8 @@
if [ -f "conf/topology.xml" ]; then
#Launch hyracks cc script with topology
-${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 -cluster-topology "conf/topology.xml" &> $CCLOGS_DIR/cc.log &
+${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -heartbeat-period 5000 -max-heartbeat-lapse-periods 4 -default-max-job-attempts 0 -job-history-size 0 -cluster-topology "conf/topology.xml" &> $CCLOGS_DIR/cc.log &
else
#Launch hyracks cc script without toplogy
-${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
+${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -heartbeat-period 5000 -max-heartbeat-lapse-periods 4 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
fi
diff --git a/pregelix/pregelix-dataflow-std-base/pom.xml b/pregelix/pregelix-dataflow-std-base/pom.xml
index 35a6c91..d4c0ee6 100644
--- a/pregelix/pregelix-dataflow-std-base/pom.xml
+++ b/pregelix/pregelix-dataflow-std-base/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
@@ -87,15 +87,15 @@
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>pregelix-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <artifactId>hyracks-dataflow-common</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunction.java b/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunction.java
index 97db63f..c544b31 100644
--- a/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunction.java
+++ b/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunction.java
@@ -24,4 +24,10 @@
public void step(IFrameTupleReference tuple) throws HyracksDataException;
public void finish() throws HyracksDataException;
+
+ public void initAll() throws HyracksDataException;
+
+ public void finishAll() throws HyracksDataException;
+
+ public int estimateStep(IFrameTupleReference tuple) throws HyracksDataException;
}
diff --git a/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunctionFactory.java b/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunctionFactory.java
index 58795d1..d5364da 100644
--- a/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunctionFactory.java
+++ b/pregelix/pregelix-dataflow-std-base/src/main/java/edu/uci/ics/pregelix/dataflow/std/base/IAggregateFunctionFactory.java
@@ -16,11 +16,12 @@
import java.io.Serializable;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
public interface IAggregateFunctionFactory extends Serializable {
- public IAggregateFunction createAggregateFunction(IHyracksTaskContext ctx,
- IDataOutputProvider provider) throws HyracksException;
+ public IAggregateFunction createAggregateFunction(IHyracksTaskContext ctx, IDataOutputProvider provider,
+ IFrameWriter writer) throws HyracksException;
}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/pom.xml b/pregelix/pregelix-dataflow-std/pom.xml
index 3604e57..9ec8e1d 100644
--- a/pregelix/pregelix-dataflow-std/pom.xml
+++ b/pregelix/pregelix-dataflow-std/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
@@ -88,84 +88,84 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow-std-base</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-hdfs-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-ipc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorDescriptor.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorDescriptor.java
new file mode 100644
index 0000000..bb41953
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorDescriptor.java
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow.group;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+
+public class ClusteredGroupOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+ private final int[] groupFields;
+ private final IBinaryComparatorFactory[] comparatorFactories;
+ private final IClusteredAggregatorDescriptorFactory aggregatorFactory;
+
+ private static final long serialVersionUID = 1L;
+
+ public ClusteredGroupOperatorDescriptor(IOperatorDescriptorRegistry spec, int[] groupFields,
+ IBinaryComparatorFactory[] comparatorFactories, IClusteredAggregatorDescriptorFactory aggregatorFactory,
+ RecordDescriptor recordDescriptor) {
+ super(spec, 1, 1);
+ this.groupFields = groupFields;
+ this.comparatorFactories = comparatorFactories;
+ this.aggregatorFactory = aggregatorFactory;
+ recordDescriptors[0] = recordDescriptor;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ final IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
+ throws HyracksDataException {
+ return new ClusteredGroupOperatorNodePushable(ctx, groupFields, comparatorFactories, aggregatorFactory,
+ recordDescProvider.getInputRecordDescriptor(getActivityId(), 0), recordDescriptors[0]);
+ }
+}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorNodePushable.java
new file mode 100644
index 0000000..a95a46e
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupOperatorNodePushable.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow.group;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputUnaryOutputOperatorNodePushable;
+
+class ClusteredGroupOperatorNodePushable extends AbstractUnaryInputUnaryOutputOperatorNodePushable {
+ private final IHyracksTaskContext ctx;
+ private final int[] groupFields;
+ private final IBinaryComparatorFactory[] comparatorFactories;
+ private final IClusteredAggregatorDescriptorFactory aggregatorFactory;
+ private final RecordDescriptor inRecordDescriptor;
+ private final RecordDescriptor outRecordDescriptor;
+ private ClusteredGroupWriter pgw;
+
+ ClusteredGroupOperatorNodePushable(IHyracksTaskContext ctx, int[] groupFields,
+ IBinaryComparatorFactory[] comparatorFactories, IClusteredAggregatorDescriptorFactory aggregatorFactory,
+ RecordDescriptor inRecordDescriptor, RecordDescriptor outRecordDescriptor) {
+ this.ctx = ctx;
+ this.groupFields = groupFields;
+ this.comparatorFactories = comparatorFactories;
+ this.aggregatorFactory = aggregatorFactory;
+ this.inRecordDescriptor = inRecordDescriptor;
+ this.outRecordDescriptor = outRecordDescriptor;
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ final IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
+ for (int i = 0; i < comparatorFactories.length; ++i) {
+ comparators[i] = comparatorFactories[i].createBinaryComparator();
+ }
+ final ByteBuffer copyFrame = ctx.allocateFrame();
+ final FrameTupleAccessor copyFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDescriptor);
+ copyFrameAccessor.reset(copyFrame);
+ ByteBuffer outFrame = ctx.allocateFrame();
+ final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(outFrame, true);
+ pgw = new ClusteredGroupWriter(ctx, groupFields, comparators, aggregatorFactory, inRecordDescriptor,
+ outRecordDescriptor, writer);
+ pgw.open();
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ pgw.nextFrame(buffer);
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ pgw.fail();
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ pgw.close();
+ }
+}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupWriter.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupWriter.java
new file mode 100644
index 0000000..4b4a1c3
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/ClusteredGroupWriter.java
@@ -0,0 +1,165 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow.group;
+
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+public class ClusteredGroupWriter implements IFrameWriter {
+ private final int[] groupFields;
+ private final IBinaryComparator[] comparators;
+ private final IAggregatorDescriptor aggregator;
+ private final AggregateState aggregateState;
+ private final IFrameWriter writer;
+ private final ByteBuffer copyFrame;
+ private final FrameTupleAccessor inFrameAccessor;
+ private final FrameTupleAccessor copyFrameAccessor;
+
+ private final ByteBuffer outFrame;
+ private final FrameTupleAppender appender;
+ private final ArrayTupleBuilder tupleBuilder;
+
+ private boolean first;
+
+ public ClusteredGroupWriter(IHyracksTaskContext ctx, int[] groupFields, IBinaryComparator[] comparators,
+ IClusteredAggregatorDescriptorFactory aggregatorFactory, RecordDescriptor inRecordDesc,
+ RecordDescriptor outRecordDesc, IFrameWriter writer) throws HyracksDataException {
+ this.groupFields = groupFields;
+ this.comparators = comparators;
+ this.writer = writer;
+ copyFrame = ctx.allocateFrame();
+ inFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
+ copyFrameAccessor = new FrameTupleAccessor(ctx.getFrameSize(), inRecordDesc);
+ copyFrameAccessor.reset(copyFrame);
+
+ outFrame = ctx.allocateFrame();
+ appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(outFrame, true);
+
+ tupleBuilder = new ArrayTupleBuilder(outRecordDesc.getFields().length);
+ this.aggregator = aggregatorFactory.createAggregator(ctx, inRecordDesc, outRecordDesc, groupFields, groupFields, writer, outFrame, appender);
+ this.aggregateState = aggregator.createAggregateStates();
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ writer.open();
+ first = true;
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ inFrameAccessor.reset(buffer);
+ int nTuples = inFrameAccessor.getTupleCount();
+ for (int i = 0; i < nTuples; ++i) {
+ if (first) {
+
+ tupleBuilder.reset();
+ for (int j = 0; j < groupFields.length; j++) {
+ tupleBuilder.addField(inFrameAccessor, i, groupFields[j]);
+ }
+ aggregator.init(tupleBuilder, inFrameAccessor, i, aggregateState);
+
+ first = false;
+
+ } else {
+ if (i == 0) {
+ switchGroupIfRequired(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1, inFrameAccessor, i);
+ } else {
+ switchGroupIfRequired(inFrameAccessor, i - 1, inFrameAccessor, i);
+ }
+
+ }
+ }
+ FrameUtils.copy(buffer, copyFrame);
+ }
+
+ private void switchGroupIfRequired(FrameTupleAccessor prevTupleAccessor, int prevTupleIndex,
+ FrameTupleAccessor currTupleAccessor, int currTupleIndex) throws HyracksDataException {
+ if (!sameGroup(prevTupleAccessor, prevTupleIndex, currTupleAccessor, currTupleIndex)) {
+ writeOutput(prevTupleAccessor, prevTupleIndex);
+
+ tupleBuilder.reset();
+ for (int j = 0; j < groupFields.length; j++) {
+ tupleBuilder.addField(currTupleAccessor, currTupleIndex, groupFields[j]);
+ }
+ aggregator.init(tupleBuilder, currTupleAccessor, currTupleIndex, aggregateState);
+ } else {
+ aggregator.aggregate(currTupleAccessor, currTupleIndex, null, 0, aggregateState);
+ }
+ }
+
+ private void writeOutput(final FrameTupleAccessor lastTupleAccessor, int lastTupleIndex)
+ throws HyracksDataException {
+ tupleBuilder.reset();
+ for (int j = 0; j < groupFields.length; j++) {
+ tupleBuilder.addField(lastTupleAccessor, lastTupleIndex, groupFields[j]);
+ }
+ aggregator.outputFinalResult(tupleBuilder, lastTupleAccessor, lastTupleIndex, aggregateState);
+ if (!appender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+ tupleBuilder.getSize())) {
+ FrameUtils.flushFrame(outFrame, writer);
+ appender.reset(outFrame, true);
+ if (!appender.appendSkipEmptyField(tupleBuilder.getFieldEndOffsets(), tupleBuilder.getByteArray(), 0,
+ tupleBuilder.getSize())) {
+ throw new HyracksDataException("The output of size " + tupleBuilder.getSize()
+ + " cannot be fit into a frame of size " + outFrame.array().length);
+ }
+ }
+
+ }
+
+ private boolean sameGroup(FrameTupleAccessor a1, int t1Idx, FrameTupleAccessor a2, int t2Idx) {
+ for (int i = 0; i < comparators.length; ++i) {
+ int fIdx = groupFields[i];
+ int s1 = a1.getTupleStartOffset(t1Idx) + a1.getFieldSlotsLength() + a1.getFieldStartOffset(t1Idx, fIdx);
+ int l1 = a1.getFieldLength(t1Idx, fIdx);
+ int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, fIdx);
+ int l2 = a2.getFieldLength(t2Idx, fIdx);
+ if (comparators[i].compare(a1.getBuffer().array(), s1, l1, a2.getBuffer().array(), s2, l2) != 0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ writer.fail();
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (!first) {
+ writeOutput(copyFrameAccessor, copyFrameAccessor.getTupleCount() - 1);
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(outFrame, writer);
+ }
+ }
+ aggregateState.close();
+ writer.close();
+ }
+}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/IClusteredAggregatorDescriptorFactory.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/IClusteredAggregatorDescriptorFactory.java
new file mode 100644
index 0000000..3256f08
--- /dev/null
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/group/IClusteredAggregatorDescriptorFactory.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow.group;
+
+import java.io.Serializable;
+import java.nio.ByteBuffer;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
+
+/**
+ *
+ */
+public interface IClusteredAggregatorDescriptorFactory extends Serializable {
+
+ IAggregatorDescriptor createAggregator(IHyracksTaskContext ctx, RecordDescriptor inRecordDescriptor,
+ RecordDescriptor outRecordDescriptor, int[] keyFields, final int[] keyFieldsInPartialResults,
+ IFrameWriter resultWriter, ByteBuffer outputFrame, FrameTupleAppender appender) throws HyracksDataException;
+
+}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
index 5156dbf..b22e468 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorNodePushable.java
@@ -157,16 +157,34 @@
ITupleReference tupleRef = cursor.getTuple();
/**
+ * merge with updated tuple
+ */
+ ITupleReference indexEntryTuple = tupleRef;
+ ITupleReference cachedUpdatedLastTuple = updateBuffer.getLastTuple();
+ if (cachedUpdatedLastTuple != null) {
+ if (compare(cachedUpdatedLastTuple, tupleRef) == 0) {
+ indexEntryTuple = cachedUpdatedLastTuple;
+ }
+ }
+
+ /**
* call the update function
*/
- functionProxy.functionCall(leftAccessor, tIndex, tupleRef, cloneUpdateTb);
+ functionProxy.functionCall(leftAccessor, tIndex, indexEntryTuple, cloneUpdateTb);
- //doing copy update
- CopyUpdateUtil.copyUpdate(tempTupleReference, tupleRef, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
- rangePred);
+ /**
+ * doing copy update
+ */
+ CopyUpdateUtil.copyUpdate(tempTupleReference, indexEntryTuple, updateBuffer, cloneUpdateTb, indexAccessor,
+ cursor, rangePred);
}
}
+ /** compare tuples */
+ private int compare(ITupleReference left, ITupleReference right) throws Exception {
+ return lowKeySearchCmp.compare(left, right);
+ }
+
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
accessor.reset(buffer);
@@ -212,8 +230,16 @@
@Override
public void fail() throws HyracksDataException {
- for (IFrameWriter writer : writers)
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexOpHelper.close();
+ }
+ for (IFrameWriter writer : writers) {
writer.fail();
+ }
}
@Override
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
index 4ca7533..0ecfd03 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinFunctionUpdateOperatorNodePushable.java
@@ -195,8 +195,10 @@
// TODO: currently use low key only, check what they mean
int cmp = compare(lowKey, currentTopTuple);
if (cmp <= 0) {
- if (cmp == 0)
+ if (cmp == 0) {
outputMatch(i);
+ currentTopTuple = cursor.getTuple();
+ }
i++;
} else {
moveTreeCursor();
@@ -252,8 +254,16 @@
@Override
public void fail() throws HyracksDataException {
- for (IFrameWriter writer : writers)
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexOpHelper.close();
+ }
+ for (IFrameWriter writer : writers) {
writer.fail();
+ }
}
/** compare tuples */
@@ -262,16 +272,28 @@
}
//for the join match casesos
- private void writeResults(IFrameTupleAccessor leftAccessor, int tIndex, ITupleReference frameTuple)
+ private void writeResults(IFrameTupleAccessor leftAccessor, int tIndex, ITupleReference indexTuple)
throws Exception {
/**
+ * merge with the cached tuple, if any
+ */
+ ITupleReference indexEntryTuple = indexTuple;
+ ITupleReference cachedUpdatedLastTuple = updateBuffer.getLastTuple();
+ if (cachedUpdatedLastTuple != null) {
+ if (compare(cachedUpdatedLastTuple, indexTuple) == 0) {
+ indexEntryTuple = cachedUpdatedLastTuple;
+ }
+ }
+ /**
* function call
*/
- functionProxy.functionCall(leftAccessor, tIndex, frameTuple, cloneUpdateTb);
+ functionProxy.functionCall(leftAccessor, tIndex, indexEntryTuple, cloneUpdateTb);
- //doing clone update
- CopyUpdateUtil.copyUpdate(tempTupleReference, frameTuple, updateBuffer, cloneUpdateTb, indexAccessor, cursor,
- rangePred);
+ /**
+ * doing clone update
+ */
+ CopyUpdateUtil.copyUpdate(tempTupleReference, indexEntryTuple, updateBuffer, cloneUpdateTb, indexAccessor,
+ cursor, rangePred);
}
/** write result for outer case */
@@ -290,4 +312,4 @@
public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
writers[index] = writer;
}
-}
\ No newline at end of file
+}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinOperatorNodePushable.java
index dd6ee3c..e64e9cc 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopRightOuterJoinOperatorNodePushable.java
@@ -245,6 +245,13 @@
@Override
public void fail() throws HyracksDataException {
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexOpHelper.close();
+ }
writer.fail();
}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
index 3cebfb8..a9c787f 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionFunctionUpdateOperatorNodePushable.java
@@ -219,8 +219,16 @@
@Override
public void fail() throws HyracksDataException {
- for (IFrameWriter writer : writers)
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexOpHelper.close();
+ }
+ for (IFrameWriter writer : writers) {
writer.fail();
+ }
}
/** compare tuples */
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionOperatorNodePushable.java
index bbe2764..86a211f 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopSetUnionOperatorNodePushable.java
@@ -205,6 +205,13 @@
@Override
public void fail() throws HyracksDataException {
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexOpHelper.close();
+ }
writer.fail();
}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeIndexBulkReLoadOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeIndexBulkReLoadOperatorNodePushable.java
index c4890e1..c985f64 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeIndexBulkReLoadOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeIndexBulkReLoadOperatorNodePushable.java
@@ -100,6 +100,11 @@
@Override
public void fail() throws HyracksDataException {
-
+ try {
+ bulkLoader.end();
+ } catch (IndexException e) {
+ treeIndexOpHelper.close();
+ throw new HyracksDataException(e);
+ }
}
}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeSearchFunctionUpdateOperatorNodePushable.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeSearchFunctionUpdateOperatorNodePushable.java
index bd85e3e..de87909 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeSearchFunctionUpdateOperatorNodePushable.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/TreeSearchFunctionUpdateOperatorNodePushable.java
@@ -224,8 +224,16 @@
@Override
public void fail() throws HyracksDataException {
- for (IFrameWriter writer : writers)
+ try {
+ cursor.close();
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ } finally {
+ treeIndexHelper.close();
+ }
+ for (IFrameWriter writer : writers) {
writer.fail();
+ }
}
@Override
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java
index 8709301..392f728 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/CopyUpdateUtil.java
@@ -34,7 +34,7 @@
int srcLen = fieldEndOffsets[1] - fieldEndOffsets[0]; // the updated vertex size
int frSize = frameTuple.getFieldLength(1); // the vertex binary size in the leaf page
if (srcLen <= frSize) {
- //doing in-place update if possible, save the "real update" overhead
+ //doing in-place update if the vertex size is not larger than the original size, save the "real update" overhead
System.arraycopy(cloneUpdateTb.getByteArray(), srcStart, frameTuple.getFieldData(1),
frameTuple.getFieldStart(1), srcLen);
cloneUpdateTb.reset();
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/ResetableByteArrayInputStream.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/ResetableByteArrayInputStream.java
index b697466..5be9ffc 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/ResetableByteArrayInputStream.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/ResetableByteArrayInputStream.java
@@ -15,11 +15,8 @@
package edu.uci.ics.pregelix.dataflow.util;
import java.io.InputStream;
-import java.util.logging.Level;
-import java.util.logging.Logger;
public class ResetableByteArrayInputStream extends InputStream {
- private static final Logger LOGGER = Logger.getLogger(ResetableByteArrayInputStream.class.getName());
private byte[] data;
private int position;
@@ -36,19 +33,12 @@
public int read() {
int remaining = data.length - position;
int value = remaining > 0 ? (data[position++] & 0xff) : -1;
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest("read(): value: " + value + " remaining: " + remaining + " position: " + position);
- }
return value;
}
@Override
public int read(byte[] bytes, int offset, int length) {
int remaining = data.length - position;
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest("read(bytes[], int, int): remaining: " + remaining + " offset: " + offset + " length: "
- + length + " position: " + position);
- }
if (remaining == 0) {
return -1;
}
@@ -57,4 +47,9 @@
position += l;
return l;
}
+
+ @Override
+ public int available() {
+ return data.length - position;
+ }
}
\ No newline at end of file
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/TupleDeserializer.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/TupleDeserializer.java
index 150bd8b..2fa1a4b 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/TupleDeserializer.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/TupleDeserializer.java
@@ -17,19 +17,15 @@
import java.io.DataInputStream;
import java.io.IOException;
-import java.util.logging.Level;
-import java.util.logging.Logger;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameConstants;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
public class TupleDeserializer {
- private static final Logger LOGGER = Logger.getLogger(TupleDeserializer.class.getName());
-
+ private static String ERROR_MSG = "Out-of-bound read in your Writable implementations of types for vertex id, vertex value, edge value or message --- check your readFields and write implmenetation";
private Object[] record;
private RecordDescriptor recordDescriptor;
private ResetableByteArrayInputStream bbis;
@@ -43,132 +39,120 @@
}
public Object[] deserializeRecord(ITupleReference tupleRef) throws HyracksDataException {
- for (int i = 0; i < tupleRef.getFieldCount(); ++i) {
- byte[] data = tupleRef.getFieldData(i);
- int offset = tupleRef.getFieldStart(i);
- bbis.setByteArray(data, offset);
+ try {
+ for (int i = 0; i < tupleRef.getFieldCount(); ++i) {
+ byte[] data = tupleRef.getFieldData(i);
+ int offset = tupleRef.getFieldStart(i);
+ int len = tupleRef.getFieldLength(i);
+ bbis.setByteArray(data, offset);
- Object instance = recordDescriptor.getFields()[i].deserialize(di);
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest(i + " " + instance);
- }
- record[i] = instance;
- if (FrameConstants.DEBUG_FRAME_IO) {
- try {
- if (di.readInt() != FrameConstants.FRAME_FIELD_MAGIC) {
- throw new HyracksDataException("Field magic mismatch");
- }
- } catch (IOException e) {
- e.printStackTrace();
+ int availableBefore = bbis.available();
+ Object instance = recordDescriptor.getFields()[i].deserialize(di);
+ int availableAfter = bbis.available();
+ if (availableBefore - availableAfter > len) {
+ throw new IllegalStateException(ERROR_MSG);
}
+
+ record[i] = instance;
}
+ return record;
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
}
- return record;
}
public Object[] deserializeRecord(IFrameTupleAccessor left, int tIndex, ITupleReference right)
throws HyracksDataException {
- byte[] data = left.getBuffer().array();
- int tStart = left.getTupleStartOffset(tIndex) + left.getFieldSlotsLength();
- int leftFieldCount = left.getFieldCount();
- int fStart = tStart;
- for (int i = 0; i < leftFieldCount; ++i) {
- /**
- * reset the input
- */
- fStart = tStart + left.getFieldStartOffset(tIndex, i);
- bbis.setByteArray(data, fStart);
+ try {
+ /** skip vertex id field in deserialization */
+ byte[] data = left.getBuffer().array();
+ int tStart = left.getTupleStartOffset(tIndex) + left.getFieldSlotsLength();
+ int leftFieldCount = left.getFieldCount();
+ int fStart = tStart;
+ for (int i = 1; i < leftFieldCount; ++i) {
+ /**
+ * reset the input
+ */
+ fStart = tStart + left.getFieldStartOffset(tIndex, i);
+ int fieldLength = left.getFieldLength(tIndex, i);
+ bbis.setByteArray(data, fStart);
- /**
- * do deserialization
- */
- Object instance = recordDescriptor.getFields()[i].deserialize(di);
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest(i + " " + instance);
- }
- record[i] = instance;
- if (FrameConstants.DEBUG_FRAME_IO) {
- try {
- if (di.readInt() != FrameConstants.FRAME_FIELD_MAGIC) {
- throw new HyracksDataException("Field magic mismatch");
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- for (int i = leftFieldCount; i < record.length; ++i) {
- byte[] rightData = right.getFieldData(i - leftFieldCount);
- int rightOffset = right.getFieldStart(i - leftFieldCount);
- bbis.setByteArray(rightData, rightOffset);
+ /**
+ * do deserialization
+ */
+ int availableBefore = bbis.available();
+ Object instance = recordDescriptor.getFields()[i].deserialize(di);
+ int availableAfter = bbis.available();
+ if (availableBefore - availableAfter > fieldLength) {
+ throw new IllegalStateException(ERROR_MSG);
- Object instance = recordDescriptor.getFields()[i].deserialize(di);
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest(i + " " + instance);
- }
- record[i] = instance;
- if (FrameConstants.DEBUG_FRAME_IO) {
- try {
- if (di.readInt() != FrameConstants.FRAME_FIELD_MAGIC) {
- throw new HyracksDataException("Field magic mismatch");
- }
- } catch (IOException e) {
- e.printStackTrace();
}
+ record[i] = instance;
}
+ /** skip vertex id field in deserialization */
+ for (int i = leftFieldCount + 1; i < record.length; ++i) {
+ byte[] rightData = right.getFieldData(i - leftFieldCount);
+ int rightOffset = right.getFieldStart(i - leftFieldCount);
+ int len = right.getFieldLength(i - leftFieldCount);
+ bbis.setByteArray(rightData, rightOffset);
+
+ int availableBefore = bbis.available();
+ Object instance = recordDescriptor.getFields()[i].deserialize(di);
+ int availableAfter = bbis.available();
+ if (availableBefore - availableAfter > len) {
+ throw new IllegalStateException(ERROR_MSG);
+ }
+ record[i] = instance;
+ }
+ return record;
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
}
- return record;
}
public Object[] deserializeRecord(ArrayTupleBuilder tb, ITupleReference right) throws HyracksDataException {
- byte[] data = tb.getByteArray();
- int[] offset = tb.getFieldEndOffsets();
- int start = 0;
- for (int i = 0; i < offset.length; ++i) {
- /**
- * reset the input
- */
- bbis.setByteArray(data, start);
- start = offset[i];
+ try {
+ byte[] data = tb.getByteArray();
+ int[] offset = tb.getFieldEndOffsets();
+ int start = 0;
+ /** skip vertex id fields in deserialization */
+ for (int i = 1; i < offset.length; ++i) {
+ /**
+ * reset the input
+ */
+ start = offset[i - 1];
+ bbis.setByteArray(data, start);
+ int fieldLength = i == 0 ? offset[0] : offset[i] - offset[i - 1];
- /**
- * do deserialization
- */
- Object instance = recordDescriptor.getFields()[i].deserialize(di);
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest(i + " " + instance);
- }
- record[i] = instance;
- if (FrameConstants.DEBUG_FRAME_IO) {
- try {
- if (di.readInt() != FrameConstants.FRAME_FIELD_MAGIC) {
- throw new HyracksDataException("Field magic mismatch");
- }
- } catch (IOException e) {
- e.printStackTrace();
+ /**
+ * do deserialization
+ */
+ int availableBefore = bbis.available();
+ Object instance = recordDescriptor.getFields()[i].deserialize(di);
+ int availableAfter = bbis.available();
+ if (availableBefore - availableAfter > fieldLength) {
+ throw new IllegalStateException(ERROR_MSG);
}
+ record[i] = instance;
}
- }
- for (int i = offset.length; i < record.length; ++i) {
- byte[] rightData = right.getFieldData(i - offset.length);
- int rightOffset = right.getFieldStart(i - offset.length);
- bbis.setByteArray(rightData, rightOffset);
+ /** skip vertex id fields in deserialization */
+ for (int i = offset.length + 1; i < record.length; ++i) {
+ byte[] rightData = right.getFieldData(i - offset.length);
+ int rightOffset = right.getFieldStart(i - offset.length);
+ bbis.setByteArray(rightData, rightOffset);
+ int fieldLength = right.getFieldLength(i - offset.length);
- Object instance = recordDescriptor.getFields()[i].deserialize(di);
- if (LOGGER.isLoggable(Level.FINEST)) {
- LOGGER.finest(i + " " + instance);
- }
- record[i] = instance;
- if (FrameConstants.DEBUG_FRAME_IO) {
- try {
- if (di.readInt() != FrameConstants.FRAME_FIELD_MAGIC) {
- throw new HyracksDataException("Field magic mismatch");
- }
- } catch (IOException e) {
- e.printStackTrace();
+ int availableBefore = bbis.available();
+ Object instance = recordDescriptor.getFields()[i].deserialize(di);
+ int availableAfter = bbis.available();
+ if (availableBefore - availableAfter > fieldLength) {
+ throw new IllegalStateException(ERROR_MSG);
}
+ record[i] = instance;
}
+ return record;
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
}
- return record;
}
}
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/UpdateBuffer.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/UpdateBuffer.java
index b2be366..4421695 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/UpdateBuffer.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/util/UpdateBuffer.java
@@ -25,8 +25,10 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.storage.am.common.api.IIndexAccessor;
import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.exceptions.TreeIndexNonExistentKeyException;
/**
* The buffer to hold updates.
@@ -41,6 +43,7 @@
private final FrameTupleAppender appender;
private final IHyracksTaskContext ctx;
private final FrameTupleReference tuple = new FrameTupleReference();
+ private final FrameTupleReference lastTuple = new FrameTupleReference();
private final int frameSize;
private IFrameTupleAccessor fta;
@@ -94,7 +97,12 @@
fta.reset(buffer);
for (int j = 0; j < fta.getTupleCount(); j++) {
tuple.reset(fta, j);
- bta.update(tuple);
+ try {
+ bta.update(tuple);
+ } catch (TreeIndexNonExistentKeyException e) {
+ // ignore non-existent key exception
+ bta.insert(tuple);
+ }
}
}
@@ -104,6 +112,21 @@
appender.reset(buffer, true);
}
+ /**
+ * return the last updated
+ *
+ * @throws HyracksDataException
+ */
+ public ITupleReference getLastTuple() throws HyracksDataException {
+ fta.reset(buffers.get(currentInUse));
+ int tupleIndex = fta.getTupleCount() - 1;
+ if (tupleIndex < 0) {
+ return null;
+ }
+ lastTuple.reset(fta, tupleIndex);
+ return lastTuple;
+ }
+
private void allocate(int index) throws HyracksDataException {
if (index >= buffers.size()) {
buffers.add(ctx.allocateFrame());
diff --git a/pregelix/pregelix-dataflow/pom.xml b/pregelix/pregelix-dataflow/pom.xml
index 2828451..1df75ae 100644
--- a/pregelix/pregelix-dataflow/pom.xml
+++ b/pregelix/pregelix-dataflow/pom.xml
@@ -7,8 +7,7 @@
WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
License for the specific language governing permissions and ! limitations
under the License. ! -->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>pregelix-dataflow</artifactId>
<packaging>jar</packaging>
@@ -17,7 +16,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
@@ -84,75 +83,75 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow-std-base</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-ipc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ClearStateOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ClearStateOperatorDescriptor.java
new file mode 100644
index 0000000..d86557b
--- /dev/null
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ClearStateOperatorDescriptor.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.context.RuntimeContext;
+
+/**
+ * Clear the state of the RuntimeContext in one slave
+ *
+ * @author yingyib
+ */
+public class ClearStateOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+ private static final long serialVersionUID = 1L;
+ private String jobId;
+
+ public ClearStateOperatorDescriptor(JobSpecification spec, String jobId) {
+ super(spec, 0, 0);
+ this.jobId = jobId;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException {
+ return new IOperatorNodePushable() {
+
+ @Override
+ public void initialize() throws HyracksDataException {
+ RuntimeContext context = (RuntimeContext) ctx.getJobletContext().getApplicationContext()
+ .getApplicationObject();
+ context.clearState(jobId);
+ }
+
+ @Override
+ public void deinitialize() throws HyracksDataException {
+
+ }
+
+ @Override
+ public int getInputArity() {
+ return 0;
+ }
+
+ @Override
+ public void setOutputFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc)
+ throws HyracksDataException {
+
+ }
+
+ @Override
+ public IFrameWriter getInputFrameWriter(int index) {
+ return null;
+ }
+
+ @Override
+ public String getDisplayName() {
+ return "Clear State Operator";
+ }
+
+ };
+ }
+
+}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/FinalAggregateOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/FinalAggregateOperatorDescriptor.java
index 3fed609..c0be9dd 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/FinalAggregateOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/FinalAggregateOperatorDescriptor.java
@@ -17,9 +17,13 @@
import java.io.DataInput;
import java.io.DataInputStream;
+import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -32,6 +36,7 @@
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
+import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
@@ -93,10 +98,28 @@
}
+ @SuppressWarnings("unchecked")
@Override
public void close() throws HyracksDataException {
- Writable finalAggregateValue = aggregator.finishFinal();
- IterationUtils.writeGlobalAggregateValue(conf, jobId, finalAggregateValue);
+ try {
+ // iterate over hdfs spilled aggregates
+ FileSystem dfs = FileSystem.get(conf);
+ String spillingDir = BspUtils.getGlobalAggregateSpillingDirName(conf, Vertex.getSuperstep());
+ FileStatus[] files = dfs.listStatus(new Path(spillingDir));
+ if (files != null) {
+ // goes into this branch only when there are spilled files
+ for (int i = 0; i < files.length; i++) {
+ FileStatus file = files[i];
+ DataInput dis = dfs.open(file.getPath());
+ partialAggregateValue.readFields(dis);
+ aggregator.step(partialAggregateValue);
+ }
+ }
+ Writable finalAggregateValue = aggregator.finishFinal();
+ IterationUtils.writeGlobalAggregateValue(conf, jobId, finalAggregateValue);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
}
};
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
index b74a5de..a1177c8 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
@@ -19,42 +19,45 @@
import java.io.IOException;
import java.nio.ByteBuffer;
-import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.apache.hadoop.util.ReflectionUtils;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameDeserializer;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
import edu.uci.ics.hyracks.hdfs.ContextFactory;
-import edu.uci.ics.pregelix.api.graph.Vertex;
-import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
-import edu.uci.ics.pregelix.api.io.VertexWriter;
-import edu.uci.ics.pregelix.api.util.BspUtils;
-import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.hyracks.hdfs2.dataflow.ConfFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
public class HDFSFileWriteOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
private static final long serialVersionUID = 1L;
- private final IConfigurationFactory confFactory;
+ private final ConfFactory confFactory;
private final IRecordDescriptorFactory inputRdFactory;
- public HDFSFileWriteOperatorDescriptor(JobSpecification spec, IConfigurationFactory confFactory,
- IRecordDescriptorFactory inputRdFactory) {
+ public HDFSFileWriteOperatorDescriptor(JobSpecification spec, Job conf, IRecordDescriptorFactory inputRdFactory)
+ throws HyracksException {
super(spec, 1, 0);
- this.confFactory = confFactory;
- this.inputRdFactory = inputRdFactory;
+ try {
+ this.confFactory = new ConfFactory(conf);
+ this.inputRdFactory = inputRdFactory;
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
}
@SuppressWarnings("rawtypes")
@@ -65,12 +68,12 @@
return new AbstractUnaryInputSinkOperatorNodePushable() {
private RecordDescriptor rd0;
private FrameDeserializer frameDeserializer;
- private Configuration conf;
- private VertexWriter vertexWriter;
+ private Job job;
+ private RecordWriter recordWriter;
private TaskAttemptContext context;
+ private ContextFactory ctxFactory = new ContextFactory();
private String TEMP_DIR = "_temporary";
private ClassLoader ctxCL;
- private ContextFactory ctxFactory = new ContextFactory();
@Override
public void open() throws HyracksDataException {
@@ -79,16 +82,16 @@
frameDeserializer = new FrameDeserializer(ctx.getFrameSize(), rd0);
ctxCL = Thread.currentThread().getContextClassLoader();
Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
- conf = confFactory.createConfiguration(ctx);
-
- VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
- context = ctxFactory.createContext(conf, partition);
- context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
+ job = confFactory.getConf();
try {
- vertexWriter = outputFormat.createVertexWriter(context);
+ OutputFormat outputFormat = ReflectionUtils.newInstance(job.getOutputFormatClass(),
+ job.getConfiguration());
+ context = ctxFactory.createContext(job.getConfiguration(), partition);
+ context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
+ recordWriter = outputFormat.getRecordWriter(context);
} catch (InterruptedException e) {
throw new HyracksDataException(e);
- } catch (IOException e) {
+ } catch (Exception e) {
throw new HyracksDataException(e);
}
}
@@ -100,8 +103,9 @@
try {
while (!frameDeserializer.done()) {
Object[] tuple = frameDeserializer.deserializeRecord();
- Vertex value = (Vertex) tuple[1];
- vertexWriter.writeVertex(value);
+ Object key = tuple[0];
+ Object value = tuple[1];
+ recordWriter.write(key, value);
}
} catch (InterruptedException e) {
throw new HyracksDataException(e);
@@ -118,7 +122,7 @@
@Override
public void close() throws HyracksDataException {
try {
- vertexWriter.close(context);
+ recordWriter.close(context);
moveFilesToFinalPath();
} catch (InterruptedException e) {
throw new HyracksDataException(e);
@@ -129,9 +133,8 @@
private void moveFilesToFinalPath() throws HyracksDataException {
try {
- JobContext job = ctxFactory.createJobContext(conf);
Path outputPath = FileOutputFormat.getOutputPath(job);
- FileSystem dfs = FileSystem.get(conf);
+ FileSystem dfs = FileSystem.get(job.getConfiguration());
Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
FileStatus[] results = findPartitionPaths(outputPath, dfs);
if (results.length >= 1) {
@@ -161,14 +164,15 @@
FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
@Override
public boolean accept(Path dir) {
- return dir.getName().endsWith(TEMP_DIR);
+ return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0;
}
});
Path tempDir = tempPaths[0].getPath();
FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
@Override
public boolean accept(Path dir) {
- return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
+ return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0
+ && dir.getName().indexOf(".crc") < 0;
}
});
return results;
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueParserFactory.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueParserFactory.java
new file mode 100644
index 0000000..a4a53e1
--- /dev/null
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueParserFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow;
+
+import java.io.DataOutput;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;
+
+/**
+ * @author yingyib
+ */
+public class KeyValueParserFactory<K extends Writable, V extends Writable> implements IKeyValueParserFactory<K, V> {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx) throws HyracksDataException {
+ final ArrayTupleBuilder tb = new ArrayTupleBuilder(2);
+ final DataOutput dos = tb.getDataOutput();
+ final ByteBuffer buffer = ctx.allocateFrame();
+ final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(buffer, true);
+
+ return new IKeyValueParser<K, V>() {
+
+ @Override
+ public void open(IFrameWriter writer) throws HyracksDataException {
+
+ }
+
+ @Override
+ public void parse(K key, V value, IFrameWriter writer, String fileString) throws HyracksDataException {
+ try {
+ tb.reset();
+ key.write(dos);
+ tb.addFieldEndOffset();
+ value.write(dos);
+ tb.addFieldEndOffset();
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ FrameUtils.flushFrame(buffer, writer);
+ appender.reset(buffer, true);
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ throw new HyracksDataException("tuple cannot be appended into the frame");
+ }
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(IFrameWriter writer) throws HyracksDataException {
+ FrameUtils.flushFrame(buffer, writer);
+ }
+
+ };
+ }
+}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueWriterFactory.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueWriterFactory.java
new file mode 100644
index 0000000..fd407be
--- /dev/null
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/KeyValueWriterFactory.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow;
+
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.util.ReflectionUtils;
+import edu.uci.ics.hyracks.hdfs.ContextFactory;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
+import edu.uci.ics.hyracks.hdfs.api.ITupleWriterFactory;
+import edu.uci.ics.hyracks.hdfs2.dataflow.ConfFactory;
+import edu.uci.ics.pregelix.api.util.ResetableByteArrayInputStream;
+
+/**
+ * @author yingyib
+ */
+@SuppressWarnings("rawtypes")
+public class KeyValueWriterFactory implements ITupleWriterFactory {
+ private static final long serialVersionUID = 1L;
+ private ConfFactory confFactory;
+
+ public KeyValueWriterFactory(ConfFactory confFactory) {
+ this.confFactory = confFactory;
+ }
+
+ @Override
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx, final int partition, final int nPartition)
+ throws HyracksDataException {
+ return new ITupleWriter() {
+ private SequenceFileOutputFormat sequenceOutputFormat = new SequenceFileOutputFormat();
+ private Writable key;
+ private Writable value;
+ private ResetableByteArrayInputStream bis = new ResetableByteArrayInputStream();
+ private DataInput dis = new DataInputStream(bis);
+ private RecordWriter recordWriter;
+ private ContextFactory ctxFactory = new ContextFactory();
+ private TaskAttemptContext context;
+
+ @Override
+ public void open(DataOutput output) throws HyracksDataException {
+ try {
+ Job job = confFactory.getConf();
+ context = ctxFactory.createContext(job.getConfiguration(), partition);
+ recordWriter = sequenceOutputFormat.getRecordWriter(context);
+ Class<?> keyClass = context.getOutputKeyClass();
+ Class<?> valClass = context.getOutputValueClass();
+ key = (Writable) ReflectionUtils.createInstance(keyClass);
+ value = (Writable) ReflectionUtils.createInstance(valClass);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
+ try {
+ byte[] data = tuple.getFieldData(0);
+ int fieldStart = tuple.getFieldStart(0);
+ bis.setByteArray(data, fieldStart);
+ key.readFields(dis);
+ data = tuple.getFieldData(1);
+ fieldStart = tuple.getFieldStart(1);
+ bis.setByteArray(data, fieldStart);
+ value.readFields(dis);
+ recordWriter.write(key, value);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(DataOutput output) throws HyracksDataException {
+ try {
+ recordWriter.close(context);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ };
+ }
+}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingReadOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingReadOperatorDescriptor.java
index ca8f190..b44b643 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingReadOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingReadOperatorDescriptor.java
@@ -30,9 +30,12 @@
public class MaterializingReadOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
private static final long serialVersionUID = 1L;
+ private final boolean removeIterationState;
- public MaterializingReadOperatorDescriptor(JobSpecification spec, RecordDescriptor recordDescriptor) {
+ public MaterializingReadOperatorDescriptor(JobSpecification spec, RecordDescriptor recordDescriptor,
+ boolean removeIterationState) {
super(spec, 1, 1);
+ this.removeIterationState = removeIterationState;
recordDescriptors[0] = recordDescriptor;
}
@@ -73,7 +76,7 @@
@Override
public void fail() throws HyracksDataException {
-
+ writer.fail();
}
@Override
@@ -81,7 +84,9 @@
/**
* remove last iteration's state
*/
- IterationUtils.removeIterationState(ctx, partition);
+ if (removeIterationState) {
+ IterationUtils.removeIterationState(ctx, partition);
+ }
writer.close();
complete = true;
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingWriteOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingWriteOperatorDescriptor.java
index 48ed806..00dcbd1 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingWriteOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/MaterializingWriteOperatorDescriptor.java
@@ -68,6 +68,8 @@
@Override
public void open() throws HyracksDataException {
+ /** remove last iteration's state */
+ IterationUtils.removeIterationState(ctx, partition);
state = new MaterializerTaskState(ctx.getJobletContext().getJobId(), new TaskId(getActivityId(),
partition));
INCApplicationContext appContext = ctx.getJobletContext().getApplicationContext();
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileWriteOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileWriteOperatorDescriptor.java
new file mode 100644
index 0000000..f3ec40e
--- /dev/null
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileWriteOperatorDescriptor.java
@@ -0,0 +1,192 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.dataflow;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
+import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameDeserializer;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.hdfs.ContextFactory;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
+
+public class VertexFileWriteOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+ private static final long serialVersionUID = 1L;
+ private final IConfigurationFactory confFactory;
+ private final IRecordDescriptorFactory inputRdFactory;
+
+ public VertexFileWriteOperatorDescriptor(JobSpecification spec, IConfigurationFactory confFactory,
+ IRecordDescriptorFactory inputRdFactory) {
+ super(spec, 1, 0);
+ this.confFactory = confFactory;
+ this.inputRdFactory = inputRdFactory;
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
+ throws HyracksDataException {
+ return new AbstractUnaryInputSinkOperatorNodePushable() {
+ private RecordDescriptor rd0;
+ private FrameDeserializer frameDeserializer;
+ private Configuration conf;
+ private VertexWriter vertexWriter;
+ private TaskAttemptContext context;
+ private String TEMP_DIR = "_temporary";
+ private ClassLoader ctxCL;
+ private ContextFactory ctxFactory = new ContextFactory();
+
+ @Override
+ public void open() throws HyracksDataException {
+ rd0 = inputRdFactory == null ? recordDescProvider.getInputRecordDescriptor(getActivityId(), 0)
+ : inputRdFactory.createRecordDescriptor(ctx);
+ frameDeserializer = new FrameDeserializer(ctx.getFrameSize(), rd0);
+ ctxCL = Thread.currentThread().getContextClassLoader();
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ conf = confFactory.createConfiguration(ctx);
+
+ VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
+ context = ctxFactory.createContext(conf, partition);
+ context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
+ try {
+ vertexWriter = outputFormat.createVertexWriter(context);
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void nextFrame(ByteBuffer frame) throws HyracksDataException {
+ frameDeserializer.reset(frame);
+ try {
+ while (!frameDeserializer.done()) {
+ Object[] tuple = frameDeserializer.deserializeRecord();
+ Vertex value = (Vertex) tuple[1];
+ vertexWriter.writeVertex(value);
+ }
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+ Thread.currentThread().setContextClassLoader(ctxCL);
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ try {
+ vertexWriter.close(context);
+ moveFilesToFinalPath();
+ } catch (InterruptedException e) {
+ throw new HyracksDataException(e);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ private void moveFilesToFinalPath() throws HyracksDataException {
+ try {
+ JobContext job = ctxFactory.createJobContext(conf);
+ Path outputPath = FileOutputFormat.getOutputPath(job);
+ FileSystem dfs = FileSystem.get(conf);
+ Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
+ FileStatus[] results = findPartitionPaths(outputPath, dfs);
+ if (results.length >= 1) {
+ /**
+ * for Hadoop-0.20.2
+ */
+ renameFile(dfs, filePath, results);
+ } else {
+ /**
+ * for Hadoop-0.23.1
+ */
+ int jobId = job.getJobID().getId();
+ outputPath = new Path(outputPath.toString() + File.separator + TEMP_DIR + File.separator
+ + jobId);
+ results = findPartitionPaths(outputPath, dfs);
+ renameFile(dfs, filePath, results);
+ }
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ } finally {
+ Thread.currentThread().setContextClassLoader(ctxCL);
+ }
+ }
+
+ private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException,
+ IOException {
+ FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().endsWith(TEMP_DIR) && dir.getName().indexOf(".crc") < 0;
+ }
+ });
+ Path tempDir = tempPaths[0].getPath();
+ FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0
+ && dir.getName().indexOf(".crc") < 0;
+ }
+ });
+ return results;
+ }
+
+ private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException,
+ HyracksDataException, FileNotFoundException {
+ Path srcDir = results[0].getPath();
+ if (!dfs.exists(srcDir))
+ throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
+
+ FileStatus[] srcFiles = dfs.listStatus(srcDir);
+ Path srcFile = srcFiles[0].getPath();
+ dfs.delete(filePath, true);
+ dfs.rename(srcFile, filePath);
+ }
+
+ };
+ }
+}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
index 24a0a9e..f3f7513 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
@@ -54,13 +54,14 @@
private final ILocalResourceRepository localResourceRepository;
private final ResourceIdFactory resourceIdFactory;
private final IBufferCache bufferCache;
- private final IVirtualBufferCache vBufferCache;
+ private final List<IVirtualBufferCache> vbcs;
private final IFileMapManager fileMapManager;
- private final Map<StateKey, IStateObject> appStateMap = new ConcurrentHashMap<StateKey, IStateObject>();
- private final Map<String, Long> giraphJobIdToSuperStep = new ConcurrentHashMap<String, Long>();
- private final Map<String, Boolean> giraphJobIdToMove = new ConcurrentHashMap<String, Boolean>();
private final IOManager ioManager;
private final Map<Long, List<FileReference>> iterationToFiles = new ConcurrentHashMap<Long, List<FileReference>>();
+ private final Map<StateKey, IStateObject> appStateMap = new ConcurrentHashMap<StateKey, IStateObject>();
+ private final Map<String, Long> jobIdToSuperStep = new ConcurrentHashMap<String, Long>();
+ private final Map<String, Boolean> jobIdToMove = new ConcurrentHashMap<String, Boolean>();
+
private final ThreadFactory threadFactory = new ThreadFactory() {
public Thread newThread(Runnable r) {
return new Thread(r);
@@ -79,9 +80,11 @@
bufferCache = new BufferCache(appCtx.getRootContext().getIOManager(), allocator, prs,
new PreDelayPageCleanerPolicy(Long.MAX_VALUE), fileMapManager, pageSize, numPages, 1000000,
threadFactory);
- int numPagesInMemComponents = numPages / 4;
- vBufferCache = new MultitenantVirtualBufferCache(new VirtualBufferCache(new HeapBufferAllocator(), pageSize,
- numPagesInMemComponents));
+ int numPagesInMemComponents = numPages / 8;
+ vbcs = new ArrayList<IVirtualBufferCache>();
+ IVirtualBufferCache vBufferCache = new MultitenantVirtualBufferCache(new VirtualBufferCache(
+ new HeapBufferAllocator(), pageSize, numPagesInMemComponents));
+ vbcs.add(vBufferCache);
ioManager = (IOManager) appCtx.getRootContext().getIOManager();
lcManager = new NoBudgetIndexLifecycleManager();
localResourceRepository = new TransientLocalResourceRepository();
@@ -100,6 +103,18 @@
System.gc();
}
+ public void clearState(String jobId) throws HyracksDataException {
+ for (Entry<Long, List<FileReference>> entry : iterationToFiles.entrySet())
+ for (FileReference fileRef : entry.getValue())
+ fileRef.delete();
+
+ iterationToFiles.clear();
+ appStateMap.clear();
+ jobIdToMove.remove(jobId);
+ jobIdToSuperStep.remove(jobId);
+ System.gc();
+ }
+
public ILocalResourceRepository getLocalResourceRepository() {
return localResourceRepository;
}
@@ -116,8 +131,8 @@
return bufferCache;
}
- public IVirtualBufferCache getVirtualBufferCache() {
- return vBufferCache;
+ public List<IVirtualBufferCache> getVirtualBufferCaches() {
+ return vbcs;
}
public IFileMapProvider getFileMapManager() {
@@ -132,32 +147,69 @@
return (RuntimeContext) ctx.getJobletContext().getApplicationContext().getApplicationObject();
}
- public synchronized void setVertexProperties(String giraphJobId, long numVertices, long numEdges) {
- Boolean toMove = giraphJobIdToMove.get(giraphJobId);
+ public synchronized void setVertexProperties(String jobId, long numVertices, long numEdges, long currentIteration) {
+ Boolean toMove = jobIdToMove.get(jobId);
if (toMove == null || toMove == true) {
- if (giraphJobIdToSuperStep.get(giraphJobId) == null) {
- giraphJobIdToSuperStep.put(giraphJobId, 0L);
+ if (jobIdToSuperStep.get(jobId) == null) {
+ if (currentIteration <= 0) {
+ jobIdToSuperStep.put(jobId, 0L);
+ } else {
+ jobIdToSuperStep.put(jobId, currentIteration);
+ }
}
- long superStep = giraphJobIdToSuperStep.get(giraphJobId);
+ long superStep = jobIdToSuperStep.get(jobId);
List<FileReference> files = iterationToFiles.remove(superStep - 1);
if (files != null) {
for (FileReference fileRef : files)
fileRef.delete();
}
- Vertex.setSuperstep(++superStep);
+ if (currentIteration > 0) {
+ Vertex.setSuperstep(currentIteration);
+ } else {
+ Vertex.setSuperstep(++superStep);
+ }
Vertex.setNumVertices(numVertices);
Vertex.setNumEdges(numEdges);
- giraphJobIdToSuperStep.put(giraphJobId, superStep);
- giraphJobIdToMove.put(giraphJobId, false);
+ jobIdToSuperStep.put(jobId, superStep);
+ jobIdToMove.put(jobId, false);
LOGGER.info("start iteration " + Vertex.getSuperstep());
}
System.gc();
}
- public synchronized void endSuperStep(String giraphJobId) {
- giraphJobIdToMove.put(giraphJobId, true);
+ public synchronized void recoverVertexProperties(String jobId, long numVertices, long numEdges,
+ long currentIteration) {
+ if (jobIdToSuperStep.get(jobId) == null) {
+ if (currentIteration <= 0) {
+ jobIdToSuperStep.put(jobId, 0L);
+ } else {
+ jobIdToSuperStep.put(jobId, currentIteration);
+ }
+ }
+
+ long superStep = jobIdToSuperStep.get(jobId);
+ List<FileReference> files = iterationToFiles.remove(superStep - 1);
+ if (files != null) {
+ for (FileReference fileRef : files)
+ fileRef.delete();
+ }
+
+ if (currentIteration > 0) {
+ Vertex.setSuperstep(currentIteration);
+ } else {
+ Vertex.setSuperstep(++superStep);
+ }
+ Vertex.setNumVertices(numVertices);
+ Vertex.setNumEdges(numEdges);
+ jobIdToSuperStep.put(jobId, superStep);
+ jobIdToMove.put(jobId, true);
+ LOGGER.info("recovered iteration " + Vertex.getSuperstep());
+ }
+
+ public synchronized void endSuperStep(String pregelixJobId) {
+ jobIdToMove.put(pregelixJobId, true);
LOGGER.info("end iteration " + Vertex.getSuperstep());
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/util/IterationUtils.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/util/IterationUtils.java
index 1d7c979..02097bf 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/util/IterationUtils.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/util/IterationUtils.java
@@ -31,6 +31,7 @@
import edu.uci.ics.hyracks.api.job.JobId;
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.api.util.JobStateUtils;
import edu.uci.ics.pregelix.dataflow.context.RuntimeContext;
import edu.uci.ics.pregelix.dataflow.context.StateKey;
@@ -51,6 +52,11 @@
RuntimeContext context = (RuntimeContext) appContext.getApplicationObject();
Map<StateKey, IStateObject> map = context.getAppStateStore();
IStateObject state = map.get(new StateKey(lastId, partition));
+ while (state == null) {
+ /** in case the last job is a checkpointing job */
+ lastId = new JobId(lastId.getId() - 1);
+ state = map.get(new StateKey(lastId, partition));
+ }
return state;
}
@@ -69,11 +75,19 @@
context.endSuperStep(giraphJobId);
}
- public static void setProperties(String giraphJobId, IHyracksTaskContext ctx, Configuration conf) {
+ public static void setProperties(String jobId, IHyracksTaskContext ctx, Configuration conf, long currentIteration) {
INCApplicationContext appContext = ctx.getJobletContext().getApplicationContext();
RuntimeContext context = (RuntimeContext) appContext.getApplicationObject();
- context.setVertexProperties(giraphJobId, conf.getLong(PregelixJob.NUM_VERTICE, -1),
- conf.getLong(PregelixJob.NUM_EDGES, -1));
+ context.setVertexProperties(jobId, conf.getLong(PregelixJob.NUM_VERTICE, -1),
+ conf.getLong(PregelixJob.NUM_EDGES, -1), currentIteration);
+ }
+
+ public static void recoverProperties(String jobId, IHyracksTaskContext ctx, Configuration conf,
+ long currentIteration) {
+ INCApplicationContext appContext = ctx.getJobletContext().getApplicationContext();
+ RuntimeContext context = (RuntimeContext) appContext.getApplicationObject();
+ context.recoverVertexProperties(jobId, conf.getLong(PregelixJob.NUM_VERTICE, -1),
+ conf.getLong(PregelixJob.NUM_EDGES, -1), currentIteration);
}
public static void writeTerminationState(Configuration conf, String jobId, boolean terminate)
@@ -91,22 +105,6 @@
}
}
- public static void writeForceTerminationState(Configuration conf, String jobId) throws HyracksDataException {
- try {
- FileSystem dfs = FileSystem.get(conf);
- String pathStr = IterationUtils.TMP_DIR + jobId + "fterm";
- Path path = new Path(pathStr);
- if (!dfs.exists(path)) {
- FSDataOutputStream output = dfs.create(path, true);
- output.writeBoolean(true);
- output.flush();
- output.close();
- }
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
- }
-
public static void writeGlobalAggregateValue(Configuration conf, String jobId, Writable agg)
throws HyracksDataException {
try {
@@ -136,19 +134,12 @@
}
}
+ public static void writeForceTerminationState(Configuration conf, String jobId) throws HyracksDataException {
+ JobStateUtils.writeForceTerminationState(conf, jobId);
+ }
+
public static boolean readForceTerminationState(Configuration conf, String jobId) throws HyracksDataException {
- try {
- FileSystem dfs = FileSystem.get(conf);
- String pathStr = IterationUtils.TMP_DIR + jobId + "fterm";
- Path path = new Path(pathStr);
- if (dfs.exists(path)) {
- return true;
- } else {
- return false;
- }
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
+ return JobStateUtils.readForceTerminationState(conf, jobId);
}
public static Writable readGlobalAggregateValue(Configuration conf, String jobId) throws HyracksDataException {
diff --git a/pregelix/pregelix-dist/pom.xml b/pregelix/pregelix-dist/pom.xml
index a868ff2..cec6efe 100644
--- a/pregelix/pregelix-dist/pom.xml
+++ b/pregelix/pregelix-dist/pom.xml
@@ -1,24 +1,19 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>pregelix-dist</artifactId>
<name>pregelix-dist</name>
@@ -38,35 +33,40 @@
</configuration>
</plugin>
<plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.2-beta-5</version>
- <executions>
- <execution>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
- </descriptors>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>attached</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-example</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>pregelix-benchmark</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ </dependency>
</dependencies>
</project>
diff --git a/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml b/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
index ab46338..a0fc2ab 100644
--- a/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
+++ b/pregelix/pregelix-dist/src/main/assembly/binary-assembly.xml
@@ -1,17 +1,12 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<assembly>
<id>binary-assembly</id>
<formats>
@@ -31,25 +26,25 @@
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
- <directory>../pregelix-core/target/appassembler/lib</directory>
- <outputDirectory>lib</outputDirectory>
- <includes>
- <include>*.jar</include>
- </includes>
- <fileMode>0755</fileMode>
- </fileSet>
- <fileSet>
- <directory>../pregelix-example/target</directory>
- <outputDirectory>examples</outputDirectory>
- <includes>
- <include>*with-dependencies.jar</include>
- </includes>
+ <directory>../pregelix-core/target/appassembler/lib</directory>
+ <outputDirectory>lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
<fileMode>0755</fileMode>
- </fileSet>
+ </fileSet>
<fileSet>
- <directory>../pregelix-example/data</directory>
- <outputDirectory>data</outputDirectory>
- <fileMode>0755</fileMode>
- </fileSet>
+ <directory>../pregelix-example/target</directory>
+ <outputDirectory>examples</outputDirectory>
+ <includes>
+ <include>*with-dependencies.jar</include>
+ </includes>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>../pregelix-example/data</directory>
+ <outputDirectory>data</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
</fileSets>
</assembly>
diff --git a/pregelix/pregelix-dist/src/main/resources/scripts/copylog.sh b/pregelix/pregelix-dist/src/main/resources/scripts/copylog.sh
new file mode 100644
index 0000000..7767b2d
--- /dev/null
+++ b/pregelix/pregelix-dist/src/main/resources/scripts/copylog.sh
@@ -0,0 +1,7 @@
+. conf/cluster.properties
+
+NODEID=`hostname | cut -d '.' -f 1`
+#echo $NODEID
+
+#echo "rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}"
+rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}
diff --git a/pregelix/pregelix-dist/src/main/resources/scripts/dumpAll.sh b/pregelix/pregelix-dist/src/main/resources/scripts/dumpAll.sh
new file mode 100644
index 0000000..e7d45e8
--- /dev/null
+++ b/pregelix/pregelix-dist/src/main/resources/scripts/dumpAll.sh
@@ -0,0 +1,12 @@
+. conf/cluster.properties
+PREGELIX_PATH=`pwd`
+LOG_PATH=$PREGELIX_PATH/logs/
+rm -rf $LOG_PATH
+mkdir $LOG_PATH
+ccname=`hostname`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; bin/dumptrace.sh; bin/copylog.sh ${ccname} ${LOG_PATH}"
+done
+
diff --git a/pregelix/pregelix-dist/src/main/resources/scripts/dumptrace.sh b/pregelix/pregelix-dist/src/main/resources/scripts/dumptrace.sh
new file mode 100644
index 0000000..9fe55f0
--- /dev/null
+++ b/pregelix/pregelix-dist/src/main/resources/scripts/dumptrace.sh
@@ -0,0 +1,15 @@
+echo `hostname`
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -QUIT $PID
diff --git a/pregelix/pregelix-dist/src/main/resources/scripts/startcc.sh b/pregelix/pregelix-dist/src/main/resources/scripts/startcc.sh
index 2a6cab2..d7a0ead 100644
--- a/pregelix/pregelix-dist/src/main/resources/scripts/startcc.sh
+++ b/pregelix/pregelix-dist/src/main/resources/scripts/startcc.sh
@@ -58,8 +58,8 @@
if [ -f "conf/topology.xml" ]; then
#Launch hyracks cc script with topology
-${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 -cluster-topology "conf/topology.xml" &> $CCLOGS_DIR/cc.log &
+${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -heartbeat-period 5000 -max-heartbeat-lapse-periods 4 -default-max-job-attempts 0 -job-history-size 0 -cluster-topology "conf/topology.xml" &> $CCLOGS_DIR/cc.log &
else
#Launch hyracks cc script without toplogy
-${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
+${PREGELIX_HOME}/bin/pregelixcc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -heartbeat-period 5000 -max-heartbeat-lapse-periods 4 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
fi
diff --git a/pregelix/pregelix-example/pom.xml b/pregelix/pregelix-example/pom.xml
index c2538b1..9994c0e 100644
--- a/pregelix/pregelix-example/pom.xml
+++ b/pregelix/pregelix-example/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -94,7 +94,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
- <version>2.4.1</version>
+ <version>2.5</version>
<configuration>
<filesets>
<fileset>
@@ -107,6 +107,7 @@
<include>expect*</include>
<include>ClusterController*</include>
<include>edu.uci.*</include>
+ <include>dev*</include>
</includes>
</fileset>
</filesets>
@@ -119,7 +120,7 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ConnectedComponentsVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ConnectedComponentsVertex.java
index 07d2d57..a280c45 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ConnectedComponentsVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ConnectedComponentsVertex.java
@@ -139,6 +139,7 @@
job.setVertexOutputFormatClass(SimpleConnectedComponentsVertexOutputFormat.class);
job.setMessageCombinerClass(ConnectedComponentsVertex.SimpleMinCombiner.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
Client.run(args, job);
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/EarlyTerminationVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/EarlyTerminationVertex.java
new file mode 100644
index 0000000..e369d29
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/EarlyTerminationVertex.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat.TextVertexWriter;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.example.client.Client;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Demonstrates the basic Pregel PageRank implementation.
+ */
+public class EarlyTerminationVertex extends Vertex<VLongWritable, VLongWritable, VLongWritable, VLongWritable> {
+ private VLongWritable tempValue = new VLongWritable();
+
+ @Override
+ public void compute(Iterator<VLongWritable> msgIterator) {
+ if (getSuperstep() == 1) {
+ if (getVertexId().get() % 4 == 2) {
+ terminatePartition();
+ } else {
+ tempValue.set(1);
+ setVertexValue(tempValue);
+ }
+ }
+ if (getSuperstep() == 2) {
+ if (getVertexId().get() % 4 == 3) {
+ terminatePartition();
+ } else {
+ tempValue.set(2);
+ setVertexValue(tempValue);
+ voteToHalt();
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getVertexId() + " " + getVertexValue();
+ }
+
+ /**
+ * Simple VertexWriter that support
+ */
+ public static class SimpleEarlyTerminattionVertexWriter extends
+ TextVertexWriter<VLongWritable, VLongWritable, VLongWritable> {
+ public SimpleEarlyTerminattionVertexWriter(RecordWriter<Text, Text> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VLongWritable, VLongWritable, VLongWritable, ?> vertex) throws IOException,
+ InterruptedException {
+ getRecordWriter().write(new Text(vertex.getVertexId().toString()),
+ new Text(vertex.getVertexValue().toString()));
+ }
+ }
+
+ public static class SimpleEarlyTerminattionVertexOutputFormat extends
+ TextVertexOutputFormat<VLongWritable, VLongWritable, VLongWritable> {
+
+ @Override
+ public VertexWriter<VLongWritable, VLongWritable, VLongWritable> createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ RecordWriter<Text, Text> recordWriter = textOutputFormat.getRecordWriter(context);
+ return new SimpleEarlyTerminattionVertexWriter(recordWriter);
+ }
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(EarlyTerminationVertex.class.getSimpleName());
+ job.setVertexClass(EarlyTerminationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleEarlyTerminattionVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+ Client.run(args, job);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
index 7cf8408..7fae776 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
@@ -18,7 +18,6 @@
import java.io.IOException;
import java.util.Iterator;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
@@ -32,6 +31,7 @@
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
/**
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowFixedsizeVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowFixedsizeVertex.java
new file mode 100644
index 0000000..6c3c752
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowFixedsizeVertex.java
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat.TextVertexWriter;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.example.client.Client;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.LongWritable;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Demonstrates the basic Pregel PageRank implementation.
+ */
+public class MessageOverflowFixedsizeVertex extends Vertex<VLongWritable, LongWritable, VLongWritable, LongWritable> {
+
+ private LongWritable outputMsg = new LongWritable(1);
+ private Random rand = new Random(System.currentTimeMillis());
+ private LongWritable tmpVertexValue = new LongWritable(0);
+ private int numOfMsgClones = 10000;
+
+ @Override
+ public void compute(Iterator<LongWritable> msgIterator) {
+ if (getSuperstep() == 1) {
+ for (int i = 0; i < numOfMsgClones; i++) {
+ outputMsg.set(Math.abs(rand.nextLong()));
+ sendMsgToAllEdges(outputMsg);
+ }
+ tmpVertexValue.set(0);
+ setVertexValue(tmpVertexValue);
+ }
+ if (getSuperstep() == 2) {
+ long numOfMsg = getVertexValue().get();
+ while (msgIterator.hasNext()) {
+ msgIterator.next();
+ numOfMsg++;
+ }
+ tmpVertexValue.set(numOfMsg);
+ setVertexValue(tmpVertexValue);
+ voteToHalt();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getVertexId() + " " + getVertexValue();
+ }
+
+ /**
+ * Simple VertexWriter that support
+ */
+ public static class SimpleMessageOverflowVertexWriter extends
+ TextVertexWriter<VLongWritable, LongWritable, VLongWritable> {
+ public SimpleMessageOverflowVertexWriter(RecordWriter<Text, Text> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VLongWritable, LongWritable, VLongWritable, ?> vertex) throws IOException,
+ InterruptedException {
+ getRecordWriter().write(new Text(vertex.getVertexId().toString()),
+ new Text(vertex.getVertexValue().toString()));
+ }
+ }
+
+ public static class SimpleMessageOverflowVertexOutputFormat extends
+ TextVertexOutputFormat<VLongWritable, LongWritable, VLongWritable> {
+
+ @Override
+ public VertexWriter<VLongWritable, LongWritable, VLongWritable> createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ RecordWriter<Text, Text> recordWriter = textOutputFormat.getRecordWriter(context);
+ return new SimpleMessageOverflowVertexWriter(recordWriter);
+ }
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(MessageOverflowFixedsizeVertex.class.getSimpleName());
+ job.setVertexClass(MessageOverflowFixedsizeVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleMessageOverflowVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+ Client.run(args, job);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowVertex.java
new file mode 100644
index 0000000..d0221bf
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/MessageOverflowVertex.java
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Random;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat.TextVertexWriter;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.example.client.Client;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Demonstrates the basic Pregel PageRank implementation.
+ */
+public class MessageOverflowVertex extends Vertex<VLongWritable, VLongWritable, VLongWritable, VLongWritable> {
+
+ private VLongWritable outputMsg = new VLongWritable(1);
+ private Random rand = new Random(System.currentTimeMillis());
+ private VLongWritable tmpVertexValue = new VLongWritable(0);
+ private int numOfMsgClones = 10000;
+ private int numIncomingMsgs = 0;
+
+ @Override
+ public void open() {
+ if (getSuperstep() == 2) {
+ numIncomingMsgs = 0;
+ }
+ }
+
+ @Override
+ public void compute(Iterator<VLongWritable> msgIterator) {
+ if (getSuperstep() == 1) {
+ for (int i = 0; i < numOfMsgClones; i++) {
+ outputMsg.set(Math.abs(rand.nextLong()));
+ sendMsgToAllEdges(outputMsg);
+ }
+ tmpVertexValue.set(0);
+ setVertexValue(tmpVertexValue);
+ }
+ if (getSuperstep() == 2) {
+ while (msgIterator.hasNext()) {
+ msgIterator.next();
+ numIncomingMsgs++;
+ }
+ }
+ }
+
+ @Override
+ public void close() {
+ if (getSuperstep() == 2) {
+ tmpVertexValue.set(numIncomingMsgs);
+ setVertexValue(tmpVertexValue);
+ voteToHalt();
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getVertexId() + " " + getVertexValue();
+ }
+
+ /**
+ * Simple VertexWriter that support
+ */
+ public static class SimpleMessageOverflowVertexWriter extends
+ TextVertexWriter<VLongWritable, VLongWritable, VLongWritable> {
+ public SimpleMessageOverflowVertexWriter(RecordWriter<Text, Text> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VLongWritable, VLongWritable, VLongWritable, ?> vertex) throws IOException,
+ InterruptedException {
+ getRecordWriter().write(new Text(vertex.getVertexId().toString()),
+ new Text(vertex.getVertexValue().toString()));
+ }
+ }
+
+ public static class SimpleMessageOverflowVertexOutputFormat extends
+ TextVertexOutputFormat<VLongWritable, VLongWritable, VLongWritable> {
+
+ @Override
+ public VertexWriter<VLongWritable, VLongWritable, VLongWritable> createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ RecordWriter<Text, Text> recordWriter = textOutputFormat.getRecordWriter(context);
+ return new SimpleMessageOverflowVertexWriter(recordWriter);
+ }
+
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(MessageOverflowVertex.class.getSimpleName());
+ job.setVertexClass(MessageOverflowVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleMessageOverflowVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+ Client.run(args, job);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/PageRankVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/PageRankVertex.java
index 8664667..a866c1c 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/PageRankVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/PageRankVertex.java
@@ -21,7 +21,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
@@ -45,6 +44,7 @@
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
/**
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ReachabilityVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ReachabilityVertex.java
index 6a42636..1bb33b8 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ReachabilityVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ReachabilityVertex.java
@@ -22,7 +22,6 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.RecordWriter;
@@ -42,6 +41,7 @@
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
import edu.uci.ics.pregelix.example.inputformat.TextReachibilityVertexInputFormat;
+import edu.uci.ics.pregelix.example.io.ByteWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
/**
@@ -116,7 +116,7 @@
}
@Override
- public void compute(Iterator<ByteWritable> msgIterator) {
+ public void compute(Iterator<ByteWritable> msgIterator) throws Exception {
if (sourceId < 0) {
sourceId = getContext().getConfiguration().getLong(SOURCE_ID, SOURCE_ID_DEFAULT);
}
@@ -171,13 +171,20 @@
return getVertexId() + " " + getVertexValue();
}
- private void signalTerminate() {
- Configuration conf = getContext().getConfiguration();
- try {
- IterationUtils.writeForceTerminationState(conf, BspUtils.getJobId(conf));
- writeReachibilityResult(conf, true);
- } catch (Exception e) {
- throw new IllegalStateException(e);
+ private void signalTerminate() throws Exception {
+ writeReachibilityResult(getContext().getConfiguration(), true);
+ terminateJob();
+ }
+
+ private void writeReachibilityResult(Configuration conf, boolean terminate) throws IOException {
+ FileSystem dfs = FileSystem.get(conf);
+ String pathStr = IterationUtils.TMP_DIR + BspUtils.getJobId(conf) + "reachibility";
+ Path path = new Path(pathStr);
+ if (!dfs.exists(path)) {
+ FSDataOutputStream output = dfs.create(path, true);
+ output.writeBoolean(terminate);
+ output.flush();
+ output.close();
}
}
@@ -187,22 +194,6 @@
}
}
- private void writeReachibilityResult(Configuration conf, boolean terminate) {
- try {
- FileSystem dfs = FileSystem.get(conf);
- String pathStr = IterationUtils.TMP_DIR + BspUtils.getJobId(conf) + "reachibility";
- Path path = new Path(pathStr);
- if (!dfs.exists(path)) {
- FSDataOutputStream output = dfs.create(path, true);
- output.writeBoolean(terminate);
- output.flush();
- output.close();
- }
- } catch (IOException e) {
- throw new IllegalStateException(e);
- }
- }
-
private static boolean readReachibilityResult(Configuration conf) {
try {
FileSystem dfs = FileSystem.get(conf);
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ShortestPathsVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ShortestPathsVertex.java
index 41c26b1..648f168 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ShortestPathsVertex.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/ShortestPathsVertex.java
@@ -19,7 +19,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -32,6 +31,7 @@
import edu.uci.ics.pregelix.example.client.Client;
import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
import edu.uci.ics.pregelix.example.inputformat.TextShortestPathsInputFormat;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
/**
@@ -127,7 +127,7 @@
}
voteToHalt();
}
-
+
@Override
public String toString() {
return getVertexId() + " " + getVertexValue();
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/client/Client.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/client/Client.java
index 6cb1f4a..393c8c9 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/client/Client.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/client/Client.java
@@ -16,6 +16,7 @@
package edu.uci.ics.pregelix.example.client;
import java.io.IOException;
+import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
@@ -44,7 +45,7 @@
public String ipAddress;
@Option(name = "-port", usage = "port of cluster controller", required = false)
- public int port;
+ public int port = 3099;
@Option(name = "-plan", usage = "query plan choice", required = false)
public Plan planChoice = Plan.OUTER_JOIN;
@@ -66,6 +67,9 @@
@Option(name = "-runtime-profiling", usage = "whether to do runtime profifling", required = false)
public String profiling = "false";
+
+ @Option(name = "-ckp-interval", usage = "checkpointing interval -- for fault-tolerance", required = false)
+ public int ckpInterval = -1;
}
public static void run(String[] args, PregelixJob job) throws Exception {
@@ -74,6 +78,13 @@
driver.runJob(job, options.planChoice, options.ipAddress, options.port, Boolean.parseBoolean(options.profiling));
}
+ public static void run(String[] args, List<PregelixJob> jobs) throws Exception {
+ Options options = prepareJobs(args, jobs);
+ Driver driver = new Driver(Client.class);
+ driver.runJobs(jobs, options.planChoice, options.ipAddress, options.port,
+ Boolean.parseBoolean(options.profiling));
+ }
+
private static Options prepareJob(String[] args, PregelixJob job) throws CmdLineException, IOException {
Options options = new Options();
CmdLineParser parser = new CmdLineParser(options);
@@ -82,8 +93,34 @@
String[] inputs = options.inputPaths.split(";");
FileInputFormat.setInputPaths(job, inputs[0]);
for (int i = 1; i < inputs.length; i++)
- FileInputFormat.addInputPaths(job, inputs[0]);
+ FileInputFormat.addInputPaths(job, inputs[i]);
FileOutputFormat.setOutputPath(job, new Path(options.outputPath));
+ setJobSpecificSettings(job, options);
+ return options;
+ }
+
+ private static Options prepareJobs(String[] args, List<PregelixJob> jobs) throws CmdLineException, IOException {
+ Options options = new Options();
+ CmdLineParser parser = new CmdLineParser(options);
+ parser.parseArgument(args);
+
+ for (int j = 0; j < jobs.size(); j++) {
+ PregelixJob job = jobs.get(j);
+ String[] inputs = options.inputPaths.split(";");
+ if (j == 0) {
+ FileInputFormat.setInputPaths(job, inputs[0]);
+ for (int i = 1; i < inputs.length; i++)
+ FileInputFormat.addInputPaths(job, inputs[i]);
+ }
+ if (j == jobs.size() - 1) {
+ FileOutputFormat.setOutputPath(job, new Path(options.outputPath));
+ }
+ setJobSpecificSettings(job, options);
+ }
+ return options;
+ }
+
+ private static void setJobSpecificSettings(PregelixJob job, Options options) {
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, options.numVertices);
job.getConfiguration().setLong(PregelixJob.NUM_EDGES, options.numEdges);
job.getConfiguration().setLong(ShortestPathsVertex.SOURCE_ID, options.sourceId);
@@ -91,7 +128,7 @@
job.getConfiguration().setLong(ReachabilityVertex.DEST_ID, options.destId);
if (options.numIteration > 0)
job.getConfiguration().setLong(PageRankVertex.ITERATIONS, options.numIteration);
- return options;
+ job.setCheckpointingInterval(options.ckpInterval);
}
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/data/VLongNormalizedKeyComputer.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/data/VLongNormalizedKeyComputer.java
index 7d824ea..44d23a4 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/data/VLongNormalizedKeyComputer.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/data/VLongNormalizedKeyComputer.java
@@ -15,7 +15,7 @@
package edu.uci.ics.pregelix.example.data;
import edu.uci.ics.pregelix.api.graph.NormalizedKeyComputer;
-import edu.uci.ics.pregelix.api.util.SerDeUtils;
+import edu.uci.ics.pregelix.example.utils.SerDeUtils;
/**
* @author yingyib
@@ -28,32 +28,36 @@
@Override
public int getNormalizedKey(byte[] bytes, int start, int length) {
- long value = SerDeUtils.readVLong(bytes, start, length);
- int highValue = (int) (value >> 32);
- if (highValue > 0) {
- /**
- * larger than Integer.MAX
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= POSTIVE_LONG_MASK;
- return highNmk;
- } else if (highValue == 0) {
- /**
- * smaller than Integer.MAX but >=0
- */
- int lowNmk = (int) value;
- lowNmk >>= 2;
- lowNmk |= NON_NEGATIVE_INT_MASK;
- return lowNmk;
- } else {
- /**
- * less than 0; TODO: have not optimized for that
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= NEGATIVE_LONG_MASK;
- return highNmk;
+ try {
+ long value = SerDeUtils.readVLong(bytes, start, length);
+ int highValue = (int) (value >> 32);
+ if (highValue > 0) {
+ /**
+ * larger than Integer.MAX
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= POSTIVE_LONG_MASK;
+ return highNmk;
+ } else if (highValue == 0) {
+ /**
+ * smaller than Integer.MAX but >=0
+ */
+ int lowNmk = (int) value;
+ lowNmk >>= 2;
+ lowNmk |= NON_NEGATIVE_INT_MASK;
+ return lowNmk;
+ } else {
+ /**
+ * less than 0; TODO: have not optimized for that
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= NEGATIVE_LONG_MASK;
+ return highNmk;
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
}
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextPageRankInputFormat.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextPageRankInputFormat.java
index f46d9c3..67681d3 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextPageRankInputFormat.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextPageRankInputFormat.java
@@ -18,7 +18,6 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -31,6 +30,7 @@
import edu.uci.ics.pregelix.api.io.text.TextVertexInputFormat;
import edu.uci.ics.pregelix.api.io.text.TextVertexInputFormat.TextVertexReader;
import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
public class TextPageRankInputFormat extends
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextShortestPathsInputFormat.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextShortestPathsInputFormat.java
index 013a063..3ea4a9f 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextShortestPathsInputFormat.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/inputformat/TextShortestPathsInputFormat.java
@@ -18,7 +18,6 @@
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@@ -31,6 +30,7 @@
import edu.uci.ics.pregelix.api.io.text.TextVertexInputFormat;
import edu.uci.ics.pregelix.api.io.text.TextVertexInputFormat.TextVertexReader;
import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
public class TextShortestPathsInputFormat extends
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BooleanWritable.java
similarity index 61%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BooleanWritable.java
index cde5022..c943288 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BooleanWritable.java
@@ -12,17 +12,25 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.example.io;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+/**
+ * Writable for Boolean values.
+ */
+public class BooleanWritable extends org.apache.hadoop.io.BooleanWritable implements WritableSizable {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+ public BooleanWritable(boolean value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public BooleanWritable() {
+ super();
}
+
+ public int sizeInBytes() {
+ return 1;
+ }
+
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/ByteWritable.java
similarity index 62%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/ByteWritable.java
index cde5022..2a1fd22 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/ByteWritable.java
@@ -12,17 +12,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package edu.uci.ics.pregelix.example.io;
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+/**
+ * Writable for Byte values.
+ */
+public class ByteWritable extends org.apache.hadoop.io.ByteWritable implements WritableSizable {
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+ public ByteWritable(byte value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public ByteWritable() {
+ super();
+ }
+
+ public int sizeInBytes() {
+ return 1;
}
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BytesWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BytesWritable.java
new file mode 100644
index 0000000..04a5549
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/BytesWritable.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example.io;
+
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+
+/**
+ * Writable for Bytes values.
+ */
+public class BytesWritable extends org.apache.hadoop.io.BytesWritable implements WritableSizable {
+
+ public BytesWritable(byte[] value) {
+ super(value);
+ }
+
+ public BytesWritable() {
+ super();
+ }
+
+ @Override
+ public int sizeInBytes() {
+ return getLength() + 4; // add the integer size slot
+ }
+
+}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/DoubleWritable.java
similarity index 61%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/DoubleWritable.java
index cde5022..ebc7fe4 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/DoubleWritable.java
@@ -13,16 +13,25 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.pregelix.example.io;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+/**
+ * Writable for Double values.
+ */
+public class DoubleWritable extends org.apache.hadoop.io.DoubleWritable implements WritableSizable {
+
+ public DoubleWritable(double value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public DoubleWritable() {
+ super();
}
+
+ public int sizeInBytes() {
+ return 8;
+ }
+
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/FloatWritable.java
similarity index 61%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/FloatWritable.java
index cde5022..6772b0a 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/FloatWritable.java
@@ -13,16 +13,22 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.pregelix.example.io;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+/** A WritableComparable for floats. */
+public class FloatWritable extends org.apache.hadoop.io.FloatWritable implements WritableSizable {
+
+ public FloatWritable(float value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public FloatWritable() {
+ super();
+ }
+
+ public int sizeInBytes() {
+ return 4;
}
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/IntWritable.java
similarity index 62%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/IntWritable.java
index cde5022..4944232 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/IntWritable.java
@@ -13,16 +13,23 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.pregelix.example.io;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+/** A WritableComparable for ints. */
+public class IntWritable extends org.apache.hadoop.io.IntWritable implements WritableSizable {
+
+ public IntWritable(int value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public IntWritable() {
+ super();
}
+
+ public int sizeInBytes() {
+ return 4;
+ }
+
}
diff --git a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/LongWritable.java
similarity index 62%
copy from hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
copy to pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/LongWritable.java
index cde5022..3ecab79 100644
--- a/hyracks/hyracks-storage-am-btree/src/main/java/edu/uci/ics/hyracks/storage/am/btree/exceptions/BTreeDuplicateKeyException.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/LongWritable.java
@@ -13,16 +13,23 @@
* limitations under the License.
*/
-package edu.uci.ics.hyracks.storage.am.btree.exceptions;
+package edu.uci.ics.pregelix.example.io;
-public class BTreeDuplicateKeyException extends BTreeException {
- private static final long serialVersionUID = 1L;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
- public BTreeDuplicateKeyException(Exception e) {
- super(e);
+/** A WritableComparable for longs. */
+public class LongWritable extends org.apache.hadoop.io.LongWritable implements WritableSizable {
+
+ public LongWritable(long value) {
+ super(value);
}
- public BTreeDuplicateKeyException(String message) {
- super(message);
+ public LongWritable() {
+ super();
}
+
+ public int sizeInBytes() {
+ return 8;
+ }
+
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/NullWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/NullWritable.java
new file mode 100644
index 0000000..a2f184a
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/NullWritable.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableComparator;
+
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+
+/** Singleton Writable with no data. */
+@SuppressWarnings("rawtypes")
+public class NullWritable implements WritableComparable, WritableSizable {
+
+ private static final NullWritable THIS = new NullWritable();
+
+ private NullWritable() {
+ } // no public ctor
+
+ /** Returns the single instance of this class. */
+ public static NullWritable get() {
+ return THIS;
+ }
+
+ public String toString() {
+ return "(null)";
+ }
+
+ public int sizeInBytes() {
+ return 0;
+ }
+
+ public int hashCode() {
+ return 0;
+ }
+
+ public int compareTo(Object other) {
+ if (!(other instanceof NullWritable)) {
+ throw new ClassCastException("can't compare " + other.getClass().getName() + " to NullWritable");
+ }
+ return 0;
+ }
+
+ public boolean equals(Object other) {
+ return other instanceof NullWritable;
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ }
+
+ public void write(DataOutput out) throws IOException {
+ }
+
+ /** A Comparator "optimized" for NullWritable. */
+ public static class Comparator extends WritableComparator {
+ public Comparator() {
+ super(NullWritable.class);
+ }
+
+ /**
+ * Compare the buffers in serialized form.
+ */
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ assert 0 == l1;
+ assert 0 == l2;
+ return 0;
+ }
+ }
+
+ static { // register this comparator
+ WritableComparator.define(NullWritable.class, new Comparator());
+ }
+}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VIntWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VIntWritable.java
new file mode 100644
index 0000000..94df74f
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VIntWritable.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.WritableUtils;
+
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+
+/**
+ * A WritableComparable for integer values stored in variable-length format.
+ * Such values take between one and five bytes. Smaller values take fewer bytes.
+ *
+ * @see org.apache.hadoop.io.WritableUtils#readVInt(DataInput)
+ */
+@SuppressWarnings("rawtypes")
+public class VIntWritable implements WritableComparable, WritableSizable {
+ private int value;
+
+ public VIntWritable() {
+ }
+
+ public VIntWritable(int value) {
+ set(value);
+ }
+
+ public int sizeInBytes() {
+ return 5;
+ }
+
+ /** Set the value of this VIntWritable. */
+ public void set(int value) {
+ this.value = value;
+ }
+
+ /** Return the value of this VIntWritable. */
+ public int get() {
+ return value;
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ value = WritableUtils.readVInt(in);
+ }
+
+ public void write(DataOutput out) throws IOException {
+ WritableUtils.writeVInt(out, value);
+ }
+
+ /** Returns true iff <code>o</code> is a VIntWritable with the same value. */
+ public boolean equals(Object o) {
+ if (!(o instanceof VIntWritable))
+ return false;
+ VIntWritable other = (VIntWritable) o;
+ return this.value == other.value;
+ }
+
+ public int hashCode() {
+ return value;
+ }
+
+ /** Compares two VIntWritables. */
+ public int compareTo(Object o) {
+ int thisValue = this.value;
+ int thatValue = ((VIntWritable) o).value;
+ return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+ }
+
+ public String toString() {
+ return Integer.toString(value);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VLongWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VLongWritable.java
index e12d930..ffbbff4 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VLongWritable.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/io/VLongWritable.java
@@ -16,23 +16,21 @@
package edu.uci.ics.pregelix.example.io;
import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
-import edu.uci.ics.pregelix.api.util.SerDeUtils;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+import edu.uci.ics.pregelix.example.utils.SerDeUtils;
/**
* A WritableComparable for longs in a variable-length format. Such values take
- * between one and five bytes. Smaller values take fewer bytes.
+ * between one and nine bytes. Smaller values take fewer bytes.
*
* @see org.apache.hadoop.io.WritableUtils#readVLong(DataInput)
*/
@SuppressWarnings("rawtypes")
-public class VLongWritable implements WritableComparable {
- private long value;
+public class VLongWritable extends org.apache.hadoop.io.VLongWritable implements WritableSizable {
public VLongWritable() {
}
@@ -41,57 +39,43 @@
set(value);
}
- /** Set the value of this LongWritable. */
- public void set(long value) {
- this.value = value;
- }
+ public int sizeInBytes() {
+ long i = get();
+ if (i >= -112 && i <= 127) {
+ return 1;
+ }
- /** Return the value of this LongWritable. */
- public long get() {
- return value;
- }
+ int len = -112;
+ if (i < 0) {
+ i ^= -1L; // take one's complement'
+ len = -120;
+ }
- public void readFields(DataInput in) throws IOException {
- value = SerDeUtils.readVLong(in);
- }
+ long tmp = i;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
- public void write(DataOutput out) throws IOException {
- SerDeUtils.writeVLong(out, value);
- }
-
- /** Returns true iff <code>o</code> is a VLongWritable with the same value. */
- public boolean equals(Object o) {
- if (!(o instanceof VLongWritable))
- return false;
- VLongWritable other = (VLongWritable) o;
- return this.value == other.value;
- }
-
- public int hashCode() {
- return (int) value;
- }
-
- /** Compares two VLongWritables. */
- public int compareTo(Object o) {
- long thisValue = this.value;
- long thatValue = ((VLongWritable) o).value;
- return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
- }
-
- public String toString() {
- return Long.toString(value);
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+ return len + 1;
}
/** A Comparator optimized for LongWritable. */
public static class Comparator extends WritableComparator {
+
public Comparator() {
super(VLongWritable.class);
}
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
- long thisValue = SerDeUtils.readVLong(b1, s1, l1);
- long thatValue = SerDeUtils.readVLong(b2, s2, l2);
- return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+ try {
+ long thisValue = SerDeUtils.readVLong(b1, s1, l1);
+ long thatValue = SerDeUtils.readVLong(b2, s2, l2);
+ return (thisValue < thatValue ? -1 : (thisValue == thatValue ? 0 : 1));
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
}
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/AdjacencyListWritable.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/AdjacencyListWritable.java
index 0a58c00..dd86a45 100644
--- a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/AdjacencyListWritable.java
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/maximalclique/AdjacencyListWritable.java
@@ -23,14 +23,13 @@
import java.util.Set;
import java.util.TreeSet;
-import org.apache.hadoop.io.Writable;
-
+import edu.uci.ics.pregelix.api.io.WritableSizable;
import edu.uci.ics.pregelix.example.io.VLongWritable;
/**
* The adjacency list contains <src, list-of-neighbors>
*/
-public class AdjacencyListWritable implements Writable {
+public class AdjacencyListWritable implements WritableSizable {
private VLongWritable sourceVertex = new VLongWritable();
private Set<VLongWritable> destinationVertexes = new TreeSet<VLongWritable>();
@@ -96,4 +95,13 @@
return destinationVertexes.contains(v);
}
+ @Override
+ public int sizeInBytes() {
+ int size = 4; // the size of list bytes
+ for (VLongWritable dest : destinationVertexes) {
+ size += dest.sizeInBytes();
+ }
+ return size;
+ }
+
}
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/utils/SerDeUtils.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/utils/SerDeUtils.java
new file mode 100644
index 0000000..2800187
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/utils/SerDeUtils.java
@@ -0,0 +1,56 @@
+package edu.uci.ics.pregelix.example.utils;
+
+import java.io.IOException;
+
+public class SerDeUtils {
+
+ /**
+ * Reads a zero-compressed encoded long from input stream and returns it.
+ *
+ * @param stream
+ * Binary input stream
+ * @throws java.io.IOException
+ * @return deserialized long from stream.
+ */
+ public static long readVLong(byte[] data, int start, int length) throws IOException {
+ byte firstByte = data[start];
+ int len = decodeVIntSize(firstByte);
+ if (len == 1) {
+ return firstByte;
+ }
+ long i = 0;
+ for (int idx = 0; idx < len - 1; idx++) {
+ i = i << 8;
+ i = i | (data[++start] & 0xFF);
+ }
+ return (isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * Parse the first byte of a vint/vlong to determine the number of bytes
+ *
+ * @param value
+ * the first byte of the vint/vlong
+ * @return the total number of bytes (1 to 9)
+ */
+ public static int decodeVIntSize(byte value) {
+ if (value >= -112) {
+ return 1;
+ } else if (value < -120) {
+ return -119 - value;
+ }
+ return -111 - value;
+ }
+
+ /**
+ * Given the first byte of a vint/vlong, determine the sign
+ *
+ * @param value
+ * the first byte
+ * @return is the value negative
+ */
+ public static boolean isNegativeVInt(byte value) {
+ return value < -120 || (value >= -112 && value < 0);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryConnectedComponentsTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryConnectedComponentsTest.java
new file mode 100644
index 0000000..efc7bcc
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryConnectedComponentsTest.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook;
+import edu.uci.ics.pregelix.api.util.DefaultVertexPartitioner;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.ConnectedComponentsVertex.SimpleConnectedComponentsVertexOutputFormat;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextConnectedComponentsInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class FailureRecoveryConnectedComponentsTest {
+ private static String INPUTPATH = "data/webmapcomplex";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/ConnectedComponentsRealComplex2";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+ try {
+ PregelixJob job = new PregelixJob(ConnectedComponentsVertex.class.getName());
+ job.setVertexClass(ConnectedComponentsVertex.class);
+ job.setVertexClass(ConnectedComponentsVertex.class);
+ job.setVertexInputFormatClass(TextConnectedComponentsInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleConnectedComponentsVertexOutputFormat.class);
+ job.setMessageCombinerClass(ConnectedComponentsVertex.SimpleMinCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setVertexPartitionerClass(DefaultVertexPartitioner.class);
+ job.setDynamicVertexValueSize(true);
+ FileInputFormat.setInputPaths(job, INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 23);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ Thread thread = new Thread(new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ synchronized (this) {
+ while (Vertex.getSuperstep() <= 5) {
+ this.wait(200);
+ }
+ PregelixHyracksIntegrationUtil.shutdownNC1();
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ });
+ thread.start();
+ driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ } catch (Exception e) {
+ PregelixHyracksIntegrationUtil.shutdownNC2();
+ testCluster.cleanupHDFS();
+ throw e;
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryInnerJoinTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryInnerJoinTest.java
new file mode 100644
index 0000000..ff1e29f
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryInnerJoinTest.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook;
+import edu.uci.ics.pregelix.core.base.IDriver.Plan;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class FailureRecoveryInnerJoinTest {
+ private static String INPUTPATH = "data/webmap";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/PageRankReal2";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+
+ try {
+ PregelixJob job = new PregelixJob(PageRankVertex.class.getName());
+ job.setVertexClass(PageRankVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job, INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ Thread thread = new Thread(new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ synchronized (this) {
+ while (Vertex.getSuperstep() <= 5) {
+ this.wait(200);
+ }
+ PregelixHyracksIntegrationUtil.shutdownNC1();
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ });
+ thread.start();
+ driver.runJob(job, Plan.INNER_JOIN, "127.0.0.1",
+ PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT, false);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ } catch (Exception e) {
+ PregelixHyracksIntegrationUtil.shutdownNC2();
+ testCluster.cleanupHDFS();
+ throw e;
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryTest.java
new file mode 100644
index 0000000..3fdaf15
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class FailureRecoveryTest {
+ private static String INPUTPATH = "data/webmap";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/PageRankReal2";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+
+ try {
+ PregelixJob job = new PregelixJob(PageRankVertex.class.getName());
+ job.setVertexClass(PageRankVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job, INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ Thread thread = new Thread(new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ synchronized (this) {
+ while (Vertex.getSuperstep() <= 5) {
+ this.wait(200);
+ }
+ PregelixHyracksIntegrationUtil.shutdownNC1();
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ });
+ thread.start();
+ driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ } catch (Exception e) {
+ PregelixHyracksIntegrationUtil.shutdownNC2();
+ testCluster.cleanupHDFS();
+ throw e;
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryWithoutCheckpointTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryWithoutCheckpointTest.java
new file mode 100644
index 0000000..e006ccd
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureRecoveryWithoutCheckpointTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class FailureRecoveryWithoutCheckpointTest {
+ private static String INPUTPATH = "data/webmap";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/PageRankReal2";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+
+ try {
+ PregelixJob job = new PregelixJob(PageRankVertex.class.getName());
+ job.setVertexClass(PageRankVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job, INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ Thread thread = new Thread(new Runnable() {
+
+ @Override
+ public void run() {
+ try {
+ synchronized (this) {
+ while (Vertex.getSuperstep() <= 5) {
+ this.wait(200);
+ }
+ PregelixHyracksIntegrationUtil.shutdownNC1();
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ });
+ thread.start();
+ driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ } catch (Exception e) {
+ PregelixHyracksIntegrationUtil.shutdownNC2();
+ testCluster.cleanupHDFS();
+ throw e;
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertex.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertex.java
new file mode 100644
index 0000000..d2464c1
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertex.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.util.Iterator;
+
+import org.apache.hadoop.io.FloatWritable;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * @author yingyib
+ */
+public class FailureVertex extends Vertex<VLongWritable, VLongWritable, FloatWritable, VLongWritable> {
+
+ @Override
+ public void compute(Iterator<VLongWritable> msgIterator) throws Exception {
+ if (getVertexId().get() == 10) {
+ throw new IllegalStateException("This job is going to fail");
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertexTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertexTest.java
new file mode 100644
index 0000000..a2d32c0
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/FailureVertexTest.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.ConnectedComponentsVertex;
+import edu.uci.ics.pregelix.example.ConnectedComponentsVertex.SimpleConnectedComponentsVertexOutputFormat;
+import edu.uci.ics.pregelix.example.FailureVertex;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+
+/**
+ * This test case tests the error message propagation.
+ *
+ * @author yingyib
+ */
+public class FailureVertexTest {
+
+ private static String INPUT_PATH = "data/webmapcomplex";
+ private static String OUTPUT_PATH = "actual/resultcomplex";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+ try {
+ PregelixJob job = new PregelixJob(FailureVertex.class.getSimpleName());
+ job.setVertexClass(FailureVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleConnectedComponentsVertexOutputFormat.class);
+ job.setMessageCombinerClass(ConnectedComponentsVertex.SimpleMinCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+
+ FileInputFormat.setInputPaths(job, INPUT_PATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 23);
+
+ Driver driver = new Driver(FailureVertex.class);
+ testCluster.setUp();
+ driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+ } catch (Exception e) {
+ Assert.assertTrue(e.toString().contains("This job is going to fail"));
+ } finally {
+ testCluster.tearDown();
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/JobConcatenationTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/JobConcatenationTest.java
new file mode 100644
index 0000000..dc7a28d
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/JobConcatenationTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class JobConcatenationTest {
+
+ private static String INPUTPATH = "data/webmap";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/PageRankReal";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+
+ try {
+ List<PregelixJob> jobs = new ArrayList<PregelixJob>();
+ PregelixJob job1 = new PregelixJob(PageRankVertex.class.getName());
+ job1.setVertexClass(PageRankVertex.class);
+ job1.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job1.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job1.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job1.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job1, INPUTPATH);
+ job1.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job1.setCheckpointHook(ConservativeCheckpointHook.class);
+
+ PregelixJob job2 = new PregelixJob(PageRankVertex.class.getName());
+ job2.setVertexClass(PageRankVertex.class);
+ job2.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job2.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job2.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job2.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileOutputFormat.setOutputPath(job2, new Path(OUTPUTPAH));
+ job2.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job2.setCheckpointHook(ConservativeCheckpointHook.class);
+
+ jobs.add(job1);
+ jobs.add(job2);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ driver.runJobs(jobs, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ testCluster.tearDown();
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/OverflowAggregatorTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/OverflowAggregatorTest.java
new file mode 100644
index 0000000..474d0a6
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/OverflowAggregatorTest.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example;
+
+import java.io.File;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.BspUtils;
+import edu.uci.ics.pregelix.core.driver.Driver;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+import edu.uci.ics.pregelix.dataflow.util.IterationUtils;
+import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
+import edu.uci.ics.pregelix.example.aggregator.OverflowAggregator;
+import edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.util.TestCluster;
+import edu.uci.ics.pregelix.example.util.TestUtils;
+
+/**
+ * @author yingyib
+ */
+public class OverflowAggregatorTest {
+
+ private static String INPUTPATH = "data/webmap";
+ private static String OUTPUTPAH = "actual/result";
+ private static String EXPECTEDPATH = "src/test/resources/expected/PageRankReal";
+
+ @Test
+ public void test() throws Exception {
+ TestCluster testCluster = new TestCluster();
+
+ try {
+ PregelixJob job = new PregelixJob(PageRankVertex.class.getName());
+ job.setVertexClass(PageRankVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
+ job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job, INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.setGlobalAggregatorClass(OverflowAggregator.class);
+
+ testCluster.setUp();
+ Driver driver = new Driver(PageRankVertex.class);
+ driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);
+
+ TestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
+ Text text = (Text) IterationUtils.readGlobalAggregateValue(job.getConfiguration(),
+ BspUtils.getJobId(job.getConfiguration()));
+ Assert.assertEquals(text.getLength(), 20 * 32767);
+ } catch (Exception e) {
+ throw e;
+ } finally {
+ testCluster.tearDown();
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/aggregator/OverflowAggregator.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/aggregator/OverflowAggregator.java
new file mode 100644
index 0000000..34b8b51
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/aggregator/OverflowAggregator.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example.aggregator;
+
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Test the case where the global aggregate's state is bloated
+ *
+ * @author yingyib
+ */
+public class OverflowAggregator extends
+ GlobalAggregator<VLongWritable, DoubleWritable, FloatWritable, DoubleWritable, Text, Text> {
+
+ private int textLength = 0;
+ private int inc = 32767;
+
+ @Override
+ public void init() {
+ textLength = 0;
+ }
+
+ @Override
+ public void step(Vertex<VLongWritable, DoubleWritable, FloatWritable, DoubleWritable> v)
+ throws HyracksDataException {
+ textLength += inc;
+ }
+
+ @Override
+ public void step(Text partialResult) {
+ textLength += partialResult.getLength();
+ }
+
+ @Override
+ public Text finishPartial() {
+ byte[] partialResult = new byte[textLength];
+ for (int i = 0; i < partialResult.length; i++) {
+ partialResult[i] = 'a';
+ }
+ Text text = new Text();
+ text.set(partialResult);
+ return text;
+ }
+
+ @Override
+ public Text finishFinal() {
+ byte[] result = new byte[textLength];
+ for (int i = 0; i < result.length; i++) {
+ result[i] = 'a';
+ }
+ Text text = new Text();
+ text.set(result);
+ return text;
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
index 15117a1..f6857fe 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
@@ -23,11 +23,17 @@
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook;
import edu.uci.ics.pregelix.api.util.DefaultVertexPartitioner;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex.SimpleConnectedComponentsVertexOutputFormat;
+import edu.uci.ics.pregelix.example.EarlyTerminationVertex;
+import edu.uci.ics.pregelix.example.EarlyTerminationVertex.SimpleEarlyTerminattionVertexOutputFormat;
import edu.uci.ics.pregelix.example.GraphMutationVertex;
import edu.uci.ics.pregelix.example.GraphMutationVertex.SimpleGraphMutationVertexOutputFormat;
+import edu.uci.ics.pregelix.example.MessageOverflowFixedsizeVertex;
+import edu.uci.ics.pregelix.example.MessageOverflowVertex;
+import edu.uci.ics.pregelix.example.MessageOverflowVertex.SimpleMessageOverflowVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex;
import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex.SimulatedPageRankVertexInputFormat;
@@ -71,6 +77,7 @@
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -85,6 +92,7 @@
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH2);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH2));
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 23);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -111,6 +119,7 @@
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.setCheckpointHook(ConservativeCheckpointHook.class);
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
@@ -121,6 +130,7 @@
job.setVertexOutputFormatClass(SimpleConnectedComponentsVertexOutputFormat.class);
job.setMessageCombinerClass(ConnectedComponentsVertex.SimpleMinCombiner.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
@@ -135,6 +145,7 @@
job.setMessageCombinerClass(ConnectedComponentsVertex.SimpleMinCombiner.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
job.setVertexPartitionerClass(DefaultVertexPartitioner.class);
+ job.setDynamicVertexValueSize(true);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH2);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH2));
job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 23);
@@ -233,7 +244,7 @@
job.setVertexInputFormatClass(TextMaximalCliqueInputFormat.class);
job.setVertexOutputFormatClass(MaximalCliqueVertexOutputFormat.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
- job.setMutationOrVariableSizedUpdateHeavy(true);
+ job.setLSMStorage(true);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH3);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
@@ -247,7 +258,7 @@
job.setVertexInputFormatClass(TextMaximalCliqueInputFormat.class);
job.setVertexOutputFormatClass(MaximalCliqueVertexOutputFormat.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
- job.setMutationOrVariableSizedUpdateHeavy(true);
+ job.setLSMStorage(true);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH4);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
@@ -262,7 +273,7 @@
job.setVertexOutputFormatClass(MaximalCliqueVertexOutputFormat.class);
job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
job.setVertexPartitionerClass(DefaultVertexPartitioner.class);
- job.setMutationOrVariableSizedUpdateHeavy(true);
+ job.setLSMStorage(true);
FileInputFormat.setInputPaths(job, HDFS_INPUTPATH5);
FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH3));
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
@@ -280,6 +291,59 @@
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
+ private static void generateMessageOverflowFixedsizeJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(MessageOverflowFixedsizeVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(MessageOverflowFixedsizeVertex.SimpleMessageOverflowVertexOutputFormat.class);
+ job.setFrameSize(2048);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void generateMessageOverflowJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(MessageOverflowVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleMessageOverflowVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+ job.setFrameSize(2048);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void generateMessageOverflowJobLSM(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(MessageOverflowVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleMessageOverflowVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ job.setDynamicVertexValueSize(true);
+ job.setFrameSize(2048);
+ job.setLSMStorage(true);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
+ private static void generateEarlyTerminationJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(EarlyTerminationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleEarlyTerminattionVertexOutputFormat.class);
+ job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
private static void genPageRank() throws IOException {
generatePageRankJob("PageRank", outputBase + "PageRank.xml");
generatePageRankJobReal("PageRank", outputBase + "PageRankReal.xml");
@@ -319,6 +383,16 @@
generateGraphMutationJob("Graph Mutation", outputBase + "GraphMutation.xml");
}
+ private static void genMessageOverflow() throws IOException {
+ generateMessageOverflowJob("Message Overflow", outputBase + "MessageOverflow.xml");
+ generateMessageOverflowJobLSM("Message Overflow LSM", outputBase + "MessageOverflowLSM.xml");
+ generateMessageOverflowFixedsizeJob("Message Overflow Fixedsize", outputBase + "MessageOverflowFixedsize.xml");
+ }
+
+ private static void genEarlyTermination() throws IOException {
+ generateEarlyTerminationJob("Early Termination", outputBase + "EarlyTermination.xml");
+ }
+
public static void main(String[] args) throws IOException {
genPageRank();
genShortestPath();
@@ -327,5 +401,7 @@
genTriangleCounting();
genMaximalClique();
genGraphMutation();
+ genMessageOverflow();
+ genEarlyTermination();
}
}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/lib/io/SizeEstimationTest.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/lib/io/SizeEstimationTest.java
new file mode 100644
index 0000000..638011b
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/lib/io/SizeEstimationTest.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example.lib.io;
+
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.util.Random;
+
+import org.junit.Test;
+
+import edu.uci.ics.pregelix.api.graph.MsgList;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
+import edu.uci.ics.pregelix.example.io.BooleanWritable;
+import edu.uci.ics.pregelix.example.io.ByteWritable;
+import edu.uci.ics.pregelix.example.io.DoubleWritable;
+import edu.uci.ics.pregelix.example.io.IntWritable;
+import edu.uci.ics.pregelix.example.io.LongWritable;
+import edu.uci.ics.pregelix.example.io.NullWritable;
+import edu.uci.ics.pregelix.example.io.VIntWritable;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * @author yingyib
+ */
+public class SizeEstimationTest {
+
+ @Test
+ public void testVLong() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ msgList.add(new VLongWritable(Long.MAX_VALUE));
+ msgList.add(new VLongWritable(Long.MIN_VALUE));
+ msgList.add(new VLongWritable(-1));
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new VLongWritable(Math.abs(rand.nextLong())));
+ }
+ verifyExactSizeEstimation(msgList);
+ }
+
+ @Test
+ public void testLong() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new LongWritable(rand.nextLong()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testBoolean() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new BooleanWritable(rand.nextBoolean()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testByte() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new ByteWritable((byte) rand.nextInt()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testDouble() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new DoubleWritable(rand.nextDouble()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testFloat() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new DoubleWritable(rand.nextFloat()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testNull() throws Exception {
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(NullWritable.get());
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testVInt() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new VIntWritable(rand.nextInt()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ @Test
+ public void testInt() throws Exception {
+ Random rand = new Random(System.currentTimeMillis());
+ MsgList<WritableSizable> msgList = new MsgList<WritableSizable>();
+ for (int i = 0; i < 1000000; i++) {
+ msgList.add(new IntWritable(rand.nextInt()));
+ }
+ verifySizeEstimation(msgList);
+ }
+
+ private void verifySizeEstimation(MsgList<WritableSizable> msgList) throws Exception {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(bos);
+ int accumulatedSize = 5;
+ for (int i = 0; i < msgList.size(); i++) {
+ bos.reset();
+ WritableSizable value = msgList.get(i);
+ value.write(dos);
+ if (value.sizeInBytes() < bos.size()) {
+ throw new Exception(value + " estimated size (" + value.sizeInBytes()
+ + ") is smaller than the actual size" + bos.size());
+ }
+ accumulatedSize += value.sizeInBytes();
+ }
+ bos.reset();
+ msgList.write(dos);
+ if (accumulatedSize < bos.size()) {
+ throw new Exception("Estimated list size (" + accumulatedSize + ") is smaller than the actual size"
+ + bos.size());
+ }
+ }
+
+ private void verifyExactSizeEstimation(MsgList<WritableSizable> msgList) throws Exception {
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(bos);
+ int accumulatedSize = 5;
+ for (int i = 0; i < msgList.size(); i++) {
+ bos.reset();
+ WritableSizable value = msgList.get(i);
+ value.write(dos);
+ if (value.sizeInBytes() != bos.size()) {
+ throw new Exception(value + " estimated size (" + value.sizeInBytes()
+ + ") is smaller than the actual size" + bos.size());
+ }
+ accumulatedSize += value.sizeInBytes();
+ }
+ bos.reset();
+ msgList.write(dos);
+ if (accumulatedSize < bos.size()) {
+ throw new Exception("Estimated list size (" + accumulatedSize + ") is smaller than the actual size"
+ + bos.size());
+ }
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/util/TestCluster.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/util/TestCluster.java
new file mode 100644
index 0000000..660d9eb
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/util/TestCluster.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.example.util;
+
+import java.io.BufferedReader;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Logger;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
+import edu.uci.ics.pregelix.core.util.PregelixHyracksIntegrationUtil;
+
+@SuppressWarnings("deprecation")
+public class TestCluster {
+ private static final Logger LOGGER = Logger.getLogger(TestCluster.class.getName());
+
+ private static final String ACTUAL_RESULT_DIR = "actual";
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/hadoop/conf";
+ private static final String PATH_TO_CLUSTER_STORE = "src/test/resources/cluster/stores.properties";
+ private static final String PATH_TO_CLUSTER_PROPERTIES = "src/test/resources/cluster/cluster.properties";
+
+ private static final String DATA_PATH = "data/webmap/webmap_link.txt";
+ private static final String HDFS_PATH = "/webmap/";
+
+ private static final String DATA_PATH2 = "data/webmapcomplex/webmap_link.txt";
+ private static final String HDFS_PATH2 = "/webmapcomplex/";
+
+ private static final String DATA_PATH3 = "data/clique/clique.txt";
+ private static final String HDFS_PATH3 = "/clique/";
+
+ private static final String DATA_PATH4 = "data/clique2/clique.txt";
+ private static final String HDFS_PATH4 = "/clique2/";
+
+ private static final String DATA_PATH5 = "data/clique3/clique.txt";
+ private static final String HDFS_PATH5 = "/clique3/";
+
+ private static final String HADOOP_CONF_PATH = ACTUAL_RESULT_DIR + File.separator + "conf.xml";
+ private MiniDFSCluster dfsCluster;
+
+ private JobConf conf = new JobConf();
+ private int numberOfNC = 2;
+
+ public void setUp() throws Exception {
+ ClusterConfig.setStorePath(PATH_TO_CLUSTER_STORE);
+ ClusterConfig.setClusterPropertiesPath(PATH_TO_CLUSTER_PROPERTIES);
+ cleanupStores();
+ PregelixHyracksIntegrationUtil.init();
+ LOGGER.info("Hyracks mini-cluster started");
+ FileUtils.forceMkdir(new File(ACTUAL_RESULT_DIR));
+ FileUtils.cleanDirectory(new File(ACTUAL_RESULT_DIR));
+ startHDFS();
+ }
+
+ private void cleanupStores() throws IOException {
+ FileUtils.forceMkdir(new File("teststore"));
+ FileUtils.forceMkdir(new File("build"));
+ FileUtils.cleanDirectory(new File("teststore"));
+ FileUtils.cleanDirectory(new File("build"));
+ }
+
+ private void startHDFS() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(conf, numberOfNC, true, null);
+ FileSystem dfs = FileSystem.get(conf);
+ Path src = new Path(DATA_PATH);
+ Path dest = new Path(HDFS_PATH);
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ src = new Path(DATA_PATH2);
+ dest = new Path(HDFS_PATH2);
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ src = new Path(DATA_PATH3);
+ dest = new Path(HDFS_PATH3);
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ src = new Path(DATA_PATH4);
+ dest = new Path(HDFS_PATH4);
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ src = new Path(DATA_PATH5);
+ dest = new Path(HDFS_PATH5);
+ dfs.mkdirs(dest);
+ dfs.copyFromLocalFile(src, dest);
+
+ DataOutputStream confOutput = new DataOutputStream(new FileOutputStream(new File(HADOOP_CONF_PATH)));
+ conf.writeXml(confOutput);
+ confOutput.flush();
+ confOutput.close();
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ public void cleanupHDFS() throws Exception {
+ dfsCluster.shutdown();
+ }
+
+ public void tearDown() throws Exception {
+ PregelixHyracksIntegrationUtil.deinit();
+ LOGGER.info("Hyracks mini-cluster shut down");
+ cleanupHDFS();
+ }
+
+ protected static List<String> getFileList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-0 b/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-0
new file mode 100755
index 0000000..2c975de
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-0
@@ -0,0 +1,10 @@
+0 0
+2 0
+4 0
+6 0
+8 0
+10 0
+12 0
+14 0
+16 0
+18 0
diff --git a/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-1 b/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-1
new file mode 100755
index 0000000..6976bc1
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/ConnectedComponentsRealComplex2/part-1
@@ -0,0 +1,13 @@
+1 0
+3 0
+5 0
+7 0
+9 0
+11 0
+13 0
+15 0
+17 0
+19 0
+21 21
+25 25
+27 27
diff --git a/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-0 b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-0
new file mode 100644
index 0000000..60a55af
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-0
@@ -0,0 +1,5 @@
+0 2
+4 2
+8 2
+12 2
+16 2
diff --git a/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-1 b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-1
new file mode 100644
index 0000000..32ee93f
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-1
@@ -0,0 +1,5 @@
+1 2
+5 2
+9 2
+13 2
+17 2
diff --git a/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-2 b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-2
new file mode 100644
index 0000000..542ccae
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-2
@@ -0,0 +1,5 @@
+2 0
+6 0
+10 0
+14 0
+18 0
diff --git a/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-3 b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-3
new file mode 100644
index 0000000..ff0e5b8
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/EarlyTermination/part-3
@@ -0,0 +1,5 @@
+3 1
+7 1
+11 1
+15 1
+19 1
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-0 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-0
new file mode 100644
index 0000000..db5f679
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-0
@@ -0,0 +1,5 @@
+0 10000
+4 70000
+8 30000
+12 90000
+16 50000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-1 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-1
new file mode 100644
index 0000000..3dc4629
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-1
@@ -0,0 +1,5 @@
+1 100000
+5 60000
+9 20000
+13 80000
+17 40000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-2 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-2
new file mode 100644
index 0000000..bc95831
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-2
@@ -0,0 +1,5 @@
+2 90000
+6 50000
+10 10000
+14 70000
+18 30000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-3 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-3
new file mode 100644
index 0000000..b619cd7
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflow/part-3
@@ -0,0 +1,5 @@
+3 80000
+7 40000
+11 100000
+15 60000
+19 20000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-0 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-0
new file mode 100644
index 0000000..db5f679
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-0
@@ -0,0 +1,5 @@
+0 10000
+4 70000
+8 30000
+12 90000
+16 50000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-1 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-1
new file mode 100644
index 0000000..3dc4629
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-1
@@ -0,0 +1,5 @@
+1 100000
+5 60000
+9 20000
+13 80000
+17 40000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-2 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-2
new file mode 100644
index 0000000..bc95831
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-2
@@ -0,0 +1,5 @@
+2 90000
+6 50000
+10 10000
+14 70000
+18 30000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-3 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-3
new file mode 100644
index 0000000..b619cd7
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowFixedsize/part-3
@@ -0,0 +1,5 @@
+3 80000
+7 40000
+11 100000
+15 60000
+19 20000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-0 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-0
new file mode 100644
index 0000000..db5f679
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-0
@@ -0,0 +1,5 @@
+0 10000
+4 70000
+8 30000
+12 90000
+16 50000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-1 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-1
new file mode 100644
index 0000000..3dc4629
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-1
@@ -0,0 +1,5 @@
+1 100000
+5 60000
+9 20000
+13 80000
+17 40000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-2 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-2
new file mode 100644
index 0000000..bc95831
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-2
@@ -0,0 +1,5 @@
+2 90000
+6 50000
+10 10000
+14 70000
+18 30000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-3 b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-3
new file mode 100644
index 0000000..b619cd7
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/MessageOverflowLSM/part-3
@@ -0,0 +1,5 @@
+3 80000
+7 40000
+11 100000
+15 60000
+19 20000
diff --git a/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-0 b/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-0
new file mode 100755
index 0000000..d135b86
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-0
@@ -0,0 +1,10 @@
+0 0.008290140026154316
+2 0.14646839195826472
+4 0.03976979906329426
+6 0.015736276824953852
+8 0.010628239626209894
+10 0.008290140026154316
+12 0.14646839195826472
+14 0.03976979906329426
+16 0.015736276824953852
+18 0.010628239626209894
diff --git a/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-1 b/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-1
new file mode 100755
index 0000000..d3badee
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/PageRankReal2/part-1
@@ -0,0 +1,10 @@
+1 0.15351528192471647
+3 0.08125113985998214
+5 0.0225041581462058
+7 0.012542224114863661
+9 0.009294348455354817
+11 0.15351528192471647
+13 0.08125113985998214
+15 0.0225041581462058
+17 0.012542224114863661
+19 0.009294348455354817
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsReal.xml b/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsReal.xml
index decbde8..df72d9b 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsReal.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsReal.xml
@@ -127,6 +127,7 @@
<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextConnectedComponentsInputFormat</value></property>
<property><name>mapred.job.queue.name</name><value>default</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
<property><name>topology.script.number.args</name><value>100</value></property>
<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsRealComplex.xml b/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsRealComplex.xml
index cca66bb..b0bf024 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsRealComplex.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/ConnectedComponentsRealComplex.xml
@@ -128,6 +128,7 @@
<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextConnectedComponentsInputFormat</value></property>
<property><name>mapred.job.queue.name</name><value>default</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
<property><name>topology.script.number.args</name><value>100</value></property>
<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/EarlyTermination.xml b/pregelix/pregelix-example/src/test/resources/jobs/EarlyTermination.xml
new file mode 100644
index 0000000..d908da8
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/EarlyTermination.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Early Termination</value></property>
+<property><name>pregelix.nmkComputerClass</name><value>edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.EarlyTerminationVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.EarlyTerminationVertex$SimpleEarlyTerminattionVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflow.xml b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflow.xml
new file mode 100644
index 0000000..8316c64
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflow.xml
@@ -0,0 +1,144 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Message Overflow</value></property>
+<property><name>pregelix.nmkComputerClass</name><value>edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>pregelix.framesize</name><value>2048</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowVertex$SimpleMessageOverflowVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowFixedsize.xml b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowFixedsize.xml
new file mode 100644
index 0000000..a894ccd
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowFixedsize.xml
@@ -0,0 +1,142 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Message Overflow Fixedsize</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowFixedsizeVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>pregelix.framesize</name><value>2048</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowFixedsizeVertex$SimpleMessageOverflowVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowLSM.xml b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowLSM.xml
new file mode 100644
index 0000000..a9f8925
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/MessageOverflowLSM.xml
@@ -0,0 +1,145 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Message Overflow LSM</value></property>
+<property><name>pregelix.nmkComputerClass</name><value>edu.uci.ics.pregelix.example.data.VLongNormalizedKeyComputer</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>pregelix.updateIntensive</name><value>true</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>pregelix.framesize</name><value>2048</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.MessageOverflowVertex$SimpleMessageOverflowVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>pregelix.incStateLength</name><value>true</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/PageRankReal.xml b/pregelix/pregelix-example/src/test/resources/jobs/PageRankReal.xml
index 9e1e0b0..b50b02a 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/PageRankReal.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/PageRankReal.xml
@@ -84,6 +84,7 @@
<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
<property><name>mapred.queue.names</name><value>default</value></property>
<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>pregelix.checkpointHook</name><value>edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook</value></property>
<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
<property><name>mapred.job.tracker</name><value>local</value></property>
<property><name>io.skip.checksum.errors</name><value>false</value></property>
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealComplex.xml b/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealComplex.xml
index c4366d7..217fbba 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealComplex.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealComplex.xml
@@ -84,6 +84,7 @@
<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
<property><name>mapred.queue.names</name><value>default</value></property>
<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>pregelix.checkpointHook</name><value>edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook</value></property>
<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
<property><name>mapred.job.tracker</name><value>local</value></property>
<property><name>io.skip.checksum.errors</name><value>false</value></property>
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealNoCombiner.xml b/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealNoCombiner.xml
index ac0d508..636b055 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealNoCombiner.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/PageRankRealNoCombiner.xml
@@ -84,6 +84,7 @@
<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
<property><name>mapred.queue.names</name><value>default</value></property>
<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>pregelix.checkpointHook</name><value>edu.uci.ics.pregelix.api.util.ConservativeCheckpointHook</value></property>
<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
<property><name>mapred.job.tracker</name><value>local</value></property>
<property><name>io.skip.checksum.errors</name><value>false</value></property>
diff --git a/pregelix/pregelix-runtime/pom.xml b/pregelix/pregelix-runtime/pom.xml
index 54e2256..6564eb0 100644
--- a/pregelix/pregelix-runtime/pom.xml
+++ b/pregelix/pregelix-runtime/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
@@ -88,89 +88,89 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>pregelix-dataflow</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-dataflow-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-data-std</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-btree</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-storage-am-lsm-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-ipc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java
index ec51047..f15b1c2 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java
@@ -14,6 +14,8 @@
*/
package edu.uci.ics.pregelix.runtime.bootstrap;
+import java.util.List;
+
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCacheProvider;
@@ -33,7 +35,7 @@
}
@Override
- public synchronized IVirtualBufferCache getVirtualBufferCache(IHyracksTaskContext ctx) {
- return RuntimeContext.get(ctx).getVirtualBufferCache();
+ public synchronized List<IVirtualBufferCache> getVirtualBufferCaches(IHyracksTaskContext ctx) {
+ return RuntimeContext.get(ctx).getVirtualBufferCaches();
}
}
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
index 16ecf6c..f3a0bb4 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
@@ -32,8 +32,8 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
+import edu.uci.ics.pregelix.api.graph.MsgList;
import edu.uci.ics.pregelix.api.graph.Vertex;
-import edu.uci.ics.pregelix.api.util.ArrayListWritable;
import edu.uci.ics.pregelix.api.util.ArrayListWritable.ArrayIterator;
import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.api.util.FrameTupleUtils;
@@ -168,11 +168,16 @@
tbAlive.reset();
vertex = (Vertex) tuple[3];
+
+ if (vertex.isPartitionTerminated()) {
+ vertex.voteToHalt();
+ return;
+ }
vertex.setOutputWriters(writers);
vertex.setOutputAppenders(appenders);
vertex.setOutputTupleBuilders(tbs);
- ArrayListWritable msgContentList = (ArrayListWritable) tuple[1];
+ MsgList msgContentList = (MsgList) tuple[1];
msgContentList.reset(msgIterator);
if (!msgIterator.hasNext() && vertex.isHalted()) {
@@ -183,9 +188,15 @@
}
try {
+ if (msgContentList.segmentStart()) {
+ vertex.open();
+ }
vertex.compute(msgIterator);
+ if (msgContentList.segmentEnd()) {
+ vertex.close();
+ }
vertex.finishCompute();
- } catch (IOException e) {
+ } catch (Exception e) {
throw new HyracksDataException(e);
}
@@ -195,7 +206,10 @@
if (terminate && (!vertex.isHalted() || vertex.hasMessage() || vertex.createdNewLiveVertex()))
terminate = false;
- aggregator.step(vertex);
+ if (msgContentList.segmentEnd()) {
+ /** the if condition makes sure aggregate only calls once per-vertex */
+ aggregator.step(vertex);
+ }
}
@Override
@@ -223,8 +237,12 @@
Writable agg = aggregator.finishPartial();
agg.write(tbGlobalAggregate.getDataOutput());
tbGlobalAggregate.addFieldEndOffset();
- appenderGlobalAggregate.append(tbGlobalAggregate.getFieldEndOffsets(),
- tbGlobalAggregate.getByteArray(), 0, tbGlobalAggregate.getSize());
+ if (!appenderGlobalAggregate.append(tbGlobalAggregate.getFieldEndOffsets(),
+ tbGlobalAggregate.getByteArray(), 0, tbGlobalAggregate.getSize())) {
+ // aggregate state exceed the page size, write to HDFS
+ FrameTupleUtils.flushTupleToHDFS(tbGlobalAggregate, conf, Vertex.getSuperstep());
+ appenderGlobalAggregate.reset(bufferGlobalAggregate, true);
+ }
FrameTupleUtils.flushTuplesFinal(appenderGlobalAggregate, writerGlobalAggregate);
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
index fa7e0a1..ca8ec01 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
@@ -172,24 +172,26 @@
tbAlive.reset();
vertex = (Vertex) tuple[1];
+ if (vertex.isPartitionTerminated()) {
+ vertex.voteToHalt();
+ return;
+ }
vertex.setOutputWriters(writers);
vertex.setOutputAppenders(appenders);
vertex.setOutputTupleBuilders(tbs);
- if (!msgIterator.hasNext() && vertex.isHalted()) {
- return;
- }
if (vertex.isHalted()) {
vertex.activate();
}
try {
+ vertex.open();
vertex.compute(msgIterator);
+ vertex.close();
vertex.finishCompute();
- } catch (IOException e) {
+ } catch (Exception e) {
throw new HyracksDataException(e);
}
-
/**
* this partition should not terminate
*/
@@ -200,6 +202,7 @@
* call the global aggregator
*/
aggregator.step(vertex);
+
}
@Override
@@ -227,8 +230,12 @@
Writable agg = aggregator.finishPartial();
agg.write(tbGlobalAggregate.getDataOutput());
tbGlobalAggregate.addFieldEndOffset();
- appenderGlobalAggregate.append(tbGlobalAggregate.getFieldEndOffsets(),
- tbGlobalAggregate.getByteArray(), 0, tbGlobalAggregate.getSize());
+ if (!appenderGlobalAggregate.append(tbGlobalAggregate.getFieldEndOffsets(),
+ tbGlobalAggregate.getByteArray(), 0, tbGlobalAggregate.getSize())) {
+ // aggregate state exceed the page size, write to HDFS
+ FrameTupleUtils.flushTupleToHDFS(tbGlobalAggregate, conf, Vertex.getSuperstep());
+ appenderGlobalAggregate.reset(bufferGlobalAggregate, true);
+ }
FrameTupleUtils.flushTuplesFinal(appenderGlobalAggregate, writerGlobalAggregate);
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AccumulatingAggregatorFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AccumulatingAggregatorFactory.java
index 77f28e4..acd766e 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AccumulatingAggregatorFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AccumulatingAggregatorFactory.java
@@ -14,22 +14,27 @@
*/
package edu.uci.ics.pregelix.runtime.simpleagg;
+import java.nio.ByteBuffer;
+
import org.apache.commons.lang3.tuple.Pair;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.data.std.util.ArrayBackedValueStorage;
import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
import edu.uci.ics.hyracks.dataflow.std.group.AggregateState;
import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
+import edu.uci.ics.pregelix.dataflow.group.IClusteredAggregatorDescriptorFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IAggregateFunction;
import edu.uci.ics.pregelix.dataflow.std.base.IAggregateFunctionFactory;
-public class AccumulatingAggregatorFactory implements IAggregatorDescriptorFactory {
+public class AccumulatingAggregatorFactory implements IClusteredAggregatorDescriptorFactory {
private static final long serialVersionUID = 1L;
private IAggregateFunctionFactory[] aggFactories;
@@ -41,52 +46,56 @@
@SuppressWarnings("unchecked")
@Override
public IAggregatorDescriptor createAggregator(final IHyracksTaskContext ctx, RecordDescriptor inRecordDesc,
- RecordDescriptor outRecordDescriptor, int[] aggKeys, int[] partialKeys) throws HyracksDataException {
+ RecordDescriptor outRecordDescriptor, final int[] groupFields, int[] partialgroupFields,
+ final IFrameWriter writer, final ByteBuffer outputFrame, final FrameTupleAppender appender)
+ throws HyracksDataException {
+ final int frameSize = ctx.getFrameSize();
+ final ArrayTupleBuilder internalTupleBuilder = new ArrayTupleBuilder(outRecordDescriptor.getFields().length);
return new IAggregatorDescriptor() {
-
private FrameTupleReference ftr = new FrameTupleReference();
+ private int groupKeySize = 0;
+ private int metaSlotSize = 4;
+
+ @Override
+ public AggregateState createAggregateStates() {
+ IAggregateFunction[] agg = new IAggregateFunction[aggFactories.length];
+ ArrayBackedValueStorage[] aggOutput = new ArrayBackedValueStorage[aggFactories.length];
+ for (int i = 0; i < agg.length; i++) {
+ aggOutput[i] = new ArrayBackedValueStorage();
+ try {
+ agg[i] = aggFactories[i].createAggregateFunction(ctx, aggOutput[i], writer);
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+ return new AggregateState(Pair.of(aggOutput, agg));
+ }
@Override
public void init(ArrayTupleBuilder tupleBuilder, IFrameTupleAccessor accessor, int tIndex,
AggregateState state) throws HyracksDataException {
- Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
- ArrayBackedValueStorage[] aggOutput = aggState.getLeft();
- IAggregateFunction[] agg = aggState.getRight();
-
- // initialize aggregate functions
- for (int i = 0; i < agg.length; i++) {
- aggOutput[i].reset();
- try {
- agg[i].init();
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
+ setGroupKeySize(accessor, tIndex);
+ initAggregateFunctions(state, true);
+ int stateSize = estimateStep(accessor, tIndex, state);
+ if (stateSize > frameSize) {
+ throw new HyracksDataException(
+ "Message combiner intermediate data size "
+ + stateSize
+ + " is larger than frame size! Check the size estimattion implementation in the message combiner.");
}
-
- ftr.reset(accessor, tIndex);
- for (int i = 0; i < agg.length; i++) {
- try {
- agg[i].step(ftr);
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
- }
+ singleStep(accessor, tIndex, state);
}
@Override
public void aggregate(IFrameTupleAccessor accessor, int tIndex, IFrameTupleAccessor stateAccessor,
int stateTupleIndex, AggregateState state) throws HyracksDataException {
- Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
- IAggregateFunction[] agg = aggState.getRight();
- ftr.reset(accessor, tIndex);
- for (int i = 0; i < agg.length; i++) {
- try {
- agg[i].step(ftr);
- } catch (Exception e) {
- throw new HyracksDataException(e);
- }
+ int stateSize = estimateStep(accessor, tIndex, state);
+ if (stateSize > frameSize) {
+ emitResultTuple(accessor, tIndex, state);
+ initAggregateFunctions(state, false);
}
+ singleStep(accessor, tIndex, state);
}
@Override
@@ -97,7 +106,7 @@
IAggregateFunction[] agg = aggState.getRight();
for (int i = 0; i < agg.length; i++) {
try {
- agg[i].finish();
+ agg[i].finishAll();
tupleBuilder.addField(aggOutput[i].getByteArray(), aggOutput[i].getStartOffset(),
aggOutput[i].getLength());
} catch (Exception e) {
@@ -107,21 +116,6 @@
}
@Override
- public AggregateState createAggregateStates() {
- IAggregateFunction[] agg = new IAggregateFunction[aggFactories.length];
- ArrayBackedValueStorage[] aggOutput = new ArrayBackedValueStorage[aggFactories.length];
- for (int i = 0; i < agg.length; i++) {
- aggOutput[i] = new ArrayBackedValueStorage();
- try {
- agg[i] = aggFactories[i].createAggregateFunction(ctx, aggOutput[i]);
- } catch (Exception e) {
- throw new IllegalStateException(e);
- }
- }
- return new AggregateState(Pair.of(aggOutput, agg));
- }
-
- @Override
public void reset() {
}
@@ -137,6 +131,97 @@
}
+ private void initAggregateFunctions(AggregateState state, boolean all) throws HyracksDataException {
+ Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
+ ArrayBackedValueStorage[] aggOutput = aggState.getLeft();
+ IAggregateFunction[] agg = aggState.getRight();
+
+ /**
+ * initialize aggregate functions
+ */
+ for (int i = 0; i < agg.length; i++) {
+ aggOutput[i].reset();
+ try {
+ if (all) {
+ agg[i].initAll();
+ } else {
+ agg[i].init();
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ }
+
+ private void singleStep(IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+ throws HyracksDataException {
+ Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
+ IAggregateFunction[] agg = aggState.getRight();
+ ftr.reset(accessor, tIndex);
+ for (int i = 0; i < agg.length; i++) {
+ try {
+ agg[i].step(ftr);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ }
+
+ private int estimateStep(IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+ throws HyracksDataException {
+ int size = metaSlotSize + groupKeySize;
+ Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
+ IAggregateFunction[] agg = aggState.getRight();
+ ftr.reset(accessor, tIndex);
+ for (int i = 0; i < agg.length; i++) {
+ try {
+ size += agg[i].estimateStep(ftr) + metaSlotSize;
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ return size * 2;
+ }
+
+ private void emitResultTuple(IFrameTupleAccessor accessor, int tIndex, AggregateState state)
+ throws HyracksDataException {
+ internalTupleBuilder.reset();
+ for (int j = 0; j < groupFields.length; j++) {
+ internalTupleBuilder.addField(accessor, tIndex, groupFields[j]);
+ }
+ Pair<ArrayBackedValueStorage[], IAggregateFunction[]> aggState = (Pair<ArrayBackedValueStorage[], IAggregateFunction[]>) state.state;
+ ArrayBackedValueStorage[] aggOutput = aggState.getLeft();
+ IAggregateFunction[] agg = aggState.getRight();
+ for (int i = 0; i < agg.length; i++) {
+ try {
+ agg[i].finish();
+ internalTupleBuilder.addField(aggOutput[i].getByteArray(), aggOutput[i].getStartOffset(),
+ aggOutput[i].getLength());
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+ if (!appender.appendSkipEmptyField(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ FrameUtils.flushFrame(outputFrame, writer);
+ appender.reset(outputFrame, true);
+ if (!appender.appendSkipEmptyField(internalTupleBuilder.getFieldEndOffsets(),
+ internalTupleBuilder.getByteArray(), 0, internalTupleBuilder.getSize())) {
+ throw new HyracksDataException("The output cannot be fit into a frame.");
+ }
+ }
+ }
+
+ public void setGroupKeySize(IFrameTupleAccessor accessor, int tIndex) {
+ groupKeySize = 0;
+ for (int i = 0; i < groupFields.length; i++) {
+ int fIndex = groupFields[i];
+ int fStartOffset = accessor.getFieldStartOffset(tIndex, fIndex);
+ int fLen = accessor.getFieldEndOffset(tIndex, fIndex) - fStartOffset;
+ groupKeySize += fLen + metaSlotSize;
+ }
+ }
+
};
}
-}
\ No newline at end of file
+}
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunction.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunction.java
index 8090dff..5bc30a2 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunction.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunction.java
@@ -26,6 +26,7 @@
import org.apache.hadoop.io.WritableComparable;
import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.comm.util.ByteBufferInputStream;
@@ -33,6 +34,7 @@
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
import edu.uci.ics.pregelix.api.graph.MessageCombiner;
import edu.uci.ics.pregelix.api.graph.MsgList;
+import edu.uci.ics.pregelix.api.io.WritableSizable;
import edu.uci.ics.pregelix.api.util.BspUtils;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
import edu.uci.ics.pregelix.dataflow.std.base.IAggregateFunction;
@@ -54,10 +56,11 @@
private MsgList msgList = new MsgList();
private boolean keyRead = false;
- public AggregationFunction(IHyracksTaskContext ctx, IConfigurationFactory confFactory, DataOutput output,
- boolean isFinalStage, boolean partialAggAsInput) throws HyracksDataException {
+ public AggregationFunction(IHyracksTaskContext ctx, IConfigurationFactory confFactory, DataOutput tmpOutput,
+ IFrameWriter groupByOutputWriter, boolean isFinalStage, boolean partialAggAsInput)
+ throws HyracksDataException {
this.conf = confFactory.createConfiguration(ctx);
- this.output = output;
+ this.output = tmpOutput;
this.isFinalStage = isFinalStage;
this.partialAggAsInput = partialAggAsInput;
msgList.setConf(this.conf);
@@ -68,6 +71,12 @@
}
@Override
+ public void initAll() throws HyracksDataException {
+ keyRead = false;
+ combiner.initAll(msgList);
+ }
+
+ @Override
public void init() throws HyracksDataException {
keyRead = false;
combiner.init(msgList);
@@ -75,6 +84,43 @@
@Override
public void step(IFrameTupleReference tuple) throws HyracksDataException {
+ if (!partialAggAsInput) {
+ combiner.stepPartial(key, (WritableSizable) value);
+ } else {
+ combiner.stepFinal(key, value);
+ }
+ }
+
+ @Override
+ public void finish() throws HyracksDataException {
+ try {
+ if (!isFinalStage) {
+ combinedResult = combiner.finishPartial();
+ } else {
+ combinedResult = combiner.finishFinal();
+ }
+ combinedResult.write(output);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void finishAll() throws HyracksDataException {
+ try {
+ if (!isFinalStage) {
+ combinedResult = combiner.finishPartial();
+ } else {
+ combinedResult = combiner.finishFinalAll();
+ }
+ combinedResult.write(output);
+ } catch (IOException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public int estimateStep(IFrameTupleReference tuple) throws HyracksDataException {
FrameTupleReference ftr = (FrameTupleReference) tuple;
IFrameTupleAccessor fta = ftr.getFrameTupleAccessor();
ByteBuffer buffer = fta.getBuffer();
@@ -94,28 +140,13 @@
}
value.readFields(valueInput);
if (!partialAggAsInput) {
- combiner.stepPartial(key, value);
+ return combiner.estimateAccumulatedStateByteSizePartial(key, (WritableSizable) value);
} else {
- combiner.stepFinal(key, value);
+ return combiner.estimateAccumulatedStateByteSizeFinal(key, value);
}
} catch (IOException e) {
throw new HyracksDataException(e);
}
-
- }
-
- @Override
- public void finish() throws HyracksDataException {
- try {
- if (!isFinalStage) {
- combinedResult = combiner.finishPartial();
- } else {
- combinedResult = combiner.finishFinal();
- }
- combinedResult.write(output);
- } catch (IOException e) {
- throw new HyracksDataException(e);
- }
}
}
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunctionFactory.java
index 33dfa5d..54eccf5 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/simpleagg/AggregationFunctionFactory.java
@@ -17,6 +17,7 @@
import java.io.DataOutput;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
@@ -37,9 +38,9 @@
}
@Override
- public IAggregateFunction createAggregateFunction(IHyracksTaskContext ctx, IDataOutputProvider provider)
- throws HyracksException {
+ public IAggregateFunction createAggregateFunction(IHyracksTaskContext ctx, IDataOutputProvider provider,
+ IFrameWriter writer) throws HyracksException {
DataOutput output = provider.getDataOutput();
- return new AggregationFunction(ctx, confFactory, output, isFinalStage, partialAggAsInput);
+ return new AggregationFunction(ctx, confFactory, output, writer, isFinalStage, partialAggAsInput);
}
}
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/MsgListNullWriterFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/MsgListNullWriterFactory.java
index 4eaa21c..b7689de 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/MsgListNullWriterFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/MsgListNullWriterFactory.java
@@ -32,6 +32,7 @@
@Override
public void writeNull(DataOutput out) throws HyracksDataException {
try {
+ out.writeByte(3); //start|end
out.writeInt(0);
} catch (IOException e) {
throw new HyracksDataException(e);
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/PreSuperStepRuntimeHookFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/PreSuperStepRuntimeHookFactory.java
index 99bcac5..3dcdad2 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/PreSuperStepRuntimeHookFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/PreSuperStepRuntimeHookFactory.java
@@ -26,11 +26,11 @@
public class PreSuperStepRuntimeHookFactory implements IRuntimeHookFactory {
private static final long serialVersionUID = 1L;
private final IConfigurationFactory confFactory;
- private final String giraphJobId;
+ private final String jobId;
- public PreSuperStepRuntimeHookFactory(String giraphJobId, IConfigurationFactory confFactory) {
+ public PreSuperStepRuntimeHookFactory(String jobId, IConfigurationFactory confFactory) {
this.confFactory = confFactory;
- this.giraphJobId = giraphJobId;
+ this.jobId = jobId;
}
@Override
@@ -40,7 +40,7 @@
@Override
public void configure(IHyracksTaskContext ctx) throws HyracksDataException {
Configuration conf = confFactory.createConfiguration(ctx);
- IterationUtils.setProperties(giraphJobId, ctx, conf);
+ IterationUtils.setProperties(jobId, ctx, conf, -1);
}
};
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RecoveryRuntimeHookFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RecoveryRuntimeHookFactory.java
new file mode 100644
index 0000000..4720272
--- /dev/null
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RecoveryRuntimeHookFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.runtime.touchpoint;
+
+import org.apache.hadoop.conf.Configuration;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
+import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHook;
+import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
+import edu.uci.ics.pregelix.dataflow.util.IterationUtils;
+
+/**
+ * Recover the pregelix job state in a NC
+ *
+ * @author yingyib
+ */
+public class RecoveryRuntimeHookFactory implements IRuntimeHookFactory {
+ private static final long serialVersionUID = 1L;
+ private final long currentSuperStep;
+ private String jobId;
+ private IConfigurationFactory confFactory;
+
+ public RecoveryRuntimeHookFactory(String jobId, long currentSuperStep, IConfigurationFactory confFactory) {
+ this.currentSuperStep = currentSuperStep;
+ this.jobId = jobId;
+ this.confFactory = confFactory;
+ }
+
+ @Override
+ public IRuntimeHook createRuntimeHook() {
+ return new IRuntimeHook() {
+
+ @Override
+ public void configure(IHyracksTaskContext ctx) throws HyracksDataException {
+ IterationUtils.endSuperStep(jobId, ctx);
+ Configuration conf = confFactory.createConfiguration(ctx);
+ IterationUtils.recoverProperties(jobId, ctx, conf, currentSuperStep);
+ }
+
+ };
+ }
+
+}