merge from zheilbron/hyracks_msr
diff --git a/hivesterix/build.sh b/hivesterix/build.sh
new file mode 100644
index 0000000..8f61559
--- /dev/null
+++ b/hivesterix/build.sh
@@ -0,0 +1,12 @@
+rm -rf dist
+mkdir dist
+
+hadoop_versions=(0.20.2 0.23.1 0.23.6 1.0.4 cdh-4.1 cdh-4.2)
+cd ../
+for v in ${hadoop_versions[@]}
+do
+ #echo mvn clean package -DskipTests=true -Dhadoop=${v}
+ mvn clean package -DskipTests=true -Dhadoop=${v}
+ #echo mv hivesterix/hivesterix-dist/target/hivesterix-dist-*-binary-assembly.zip hivesterix/dist/hivesterix-dist-binary-assembley-hdfs-${v}.zip
+ mv hivesterix/hivesterix-dist/target/hivesterix-dist-*-binary-assembly.zip hivesterix/dist/hivesterix-dist-binary-assembley-hdfs-${v}.zip
+done
diff --git a/hivesterix/hivesterix-common/pom.xml b/hivesterix/hivesterix-common/pom.xml
index 7741193..fe9271b 100644
--- a/hivesterix/hivesterix-common/pom.xml
+++ b/hivesterix/hivesterix-common/pom.xml
@@ -1,18 +1,13 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hivesterix-common</artifactId>
@@ -21,7 +16,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -44,30 +39,54 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-api</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-hdfs-core</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-serde</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
</dependencies>
+
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>cdh-build</id>
+ <url>https://repository.cloudera.com/content/groups/cdh-build</url>
+ </repository>
+ </repositories>
</project>
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
index 783004c..2ec3811 100644
--- a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
@@ -12,212 +12,217 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class ExpressionTranslator {
-
- public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
- FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- Object info = ((HiveFunctionInfo) funcInfo).getInfo();
- ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
- return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
- }
-
- if (fid.getName().equals(ExpressionConstant.NULL)) {
- return new ExprNodeNullDesc();
- }
-
- /**
- * argument expressions: translate argument expressions recursively
- * first, this logic is shared in scalar, aggregation and unnesting
- * function
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
- List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- for (Mutable<ILogicalExpression> argument : arguments) {
- /**
- * parameters could not be aggregate function desc
- */
- ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
- parameters.add(parameter);
- }
-
- /**
- * get expression
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
-
- int inputSize = parameters.size();
- List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
-
- // generate expression tree if necessary
- while (inputSize > 2) {
- int pairs = inputSize / 2;
- for (int i = 0; i < pairs; i++) {
- List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
- descs.add(parameters.get(2 * i));
- descs.add(parameters.get(2 * i + 1));
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
- currentDescs.add(desc);
- }
-
- if (inputSize % 2 != 0) {
- // List<ExprNodeDesc> descs = new
- // ArrayList<ExprNodeDesc>();
- // ExprNodeDesc lastExpr =
- // currentDescs.remove(currentDescs.size() - 1);
- // descs.add(lastExpr);
- currentDescs.add(parameters.get(inputSize - 1));
- // ExprNodeDesc desc =
- // ExprNodeGenericFuncDesc.newInstance(udf, descs);
- // currentDescs.add(desc);
- }
- inputSize = currentDescs.size();
- parameters.clear();
- parameters.addAll(currentDescs);
- currentDescs.clear();
- }
-
- } else {
- Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- if (secondInfo != null) {
-
- /**
- * for GenericUDFBridge: we should not call get type of
- * this hive expression, because parameters may have
- * been changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
- .getInfo();
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(fid.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- }
- /**
- * get hive generic function expression
- */
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
- return desc;
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
- .getInfo();
- /**
- * set parameters
- */
- aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
-
- List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
- for (ExprNodeDesc parameter : parameters) {
- if (parameter.getTypeInfo() instanceof StructTypeInfo) {
- originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
- } else
- originalParameterTypeInfos.add(parameter.getTypeInfo());
- }
-
- GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
- aggregateDesc.getGenericUDAFName(), originalParameterTypeInfos, aggregateDesc.getDistinct(),
- false);
-
- AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
- aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
- return newAggregateDesc;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
- String funcName = hiveDesc.getUDTFName();
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
- GenericUDTF udtf = fi.getGenericUDTF();
- UDTFDesc desc = new UDTFDesc(udtf);
- return desc;
- } else {
- throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
- }
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
- ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
- return desc;
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
- /**
- * get expression for constant in the environment
- */
- ConstantExpression varExpr = (ConstantExpression) expr;
- Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
- ExprNodeDesc desc = new ExprNodeConstantDesc(value);
- return desc;
- } else {
- throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
- }
- }
-}
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class ExpressionTranslator {
+
+ public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+ FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ Object info = ((HiveFunctionInfo) funcInfo).getInfo();
+ ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
+ return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
+ }
+
+ if (fid.getName().equals(ExpressionConstant.NULL)) {
+ return new ExprNodeNullDesc();
+ }
+
+ /**
+ * argument expressions: translate argument expressions recursively
+ * first, this logic is shared in scalar, aggregation and unnesting
+ * function
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ /**
+ * parameters could not be aggregate function desc
+ */
+ ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
+ parameters.add(parameter);
+ }
+
+ /**
+ * get expression
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+
+ int inputSize = parameters.size();
+ List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
+
+ // generate expression tree if necessary
+ while (inputSize > 2) {
+ int pairs = inputSize / 2;
+ for (int i = 0; i < pairs; i++) {
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
+ descs.add(parameters.get(2 * i));
+ descs.add(parameters.get(2 * i + 1));
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ currentDescs.add(desc);
+ }
+
+ if (inputSize % 2 != 0) {
+ // List<ExprNodeDesc> descs = new
+ // ArrayList<ExprNodeDesc>();
+ // ExprNodeDesc lastExpr =
+ // currentDescs.remove(currentDescs.size() - 1);
+ // descs.add(lastExpr);
+ currentDescs.add(parameters.get(inputSize - 1));
+ // ExprNodeDesc desc =
+ // ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ // currentDescs.add(desc);
+ }
+ inputSize = currentDescs.size();
+ parameters.clear();
+ parameters.addAll(currentDescs);
+ currentDescs.clear();
+ }
+
+ } else {
+ Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ if (secondInfo != null) {
+
+ /**
+ * for GenericUDFBridge: we should not call get type of
+ * this hive expression, because parameters may have
+ * been changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
+ .getInfo();
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(fid.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ }
+ /**
+ * get hive generic function expression
+ */
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
+ return desc;
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * set parameters
+ */
+ aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
+
+ List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
+ for (ExprNodeDesc parameter : parameters) {
+ if (parameter.getTypeInfo() instanceof StructTypeInfo) {
+ originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ originalParameterTypeInfos.add(parameter.getTypeInfo());
+ }
+
+ List<ObjectInspector> originalParameterOIs = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : originalParameterTypeInfos) {
+ originalParameterOIs.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+ GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
+ aggregateDesc.getGenericUDAFName(), originalParameterOIs, aggregateDesc.getDistinct(), false);
+
+ AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
+ aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
+ return newAggregateDesc;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ String funcName = hiveDesc.getUDTFName();
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ GenericUDTF udtf = fi.getGenericUDTF();
+ UDTFDesc desc = new UDTFDesc(udtf);
+ return desc;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
+ ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
+ return desc;
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
+ /**
+ * get expression for constant in the environment
+ */
+ ConstantExpression varExpr = (ConstantExpression) expr;
+ Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
+ ExprNodeDesc desc = new ExprNodeConstantDesc(value);
+ return desc;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
index 8ecdfe1..917487f 100644
--- a/hivesterix/hivesterix-dist/pom.xml
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -1,27 +1,22 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>hivesterix-dist</artifactId>
<name>hivesterix-dist</name>
<parent>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- </parent>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ </parent>
<dependencies>
<dependency>
@@ -32,298 +27,37 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.8.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-translator</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-optimizer</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -474,6 +208,23 @@
</executions>
</plugin>
<plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
<configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
index aeb3fb4..e2da26a 100755
--- a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
+++ b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
@@ -1,17 +1,12 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<assembly>
<id>binary-assembly</id>
<formats>
@@ -21,20 +16,30 @@
<includeBaseDirectory>false</includeBaseDirectory>
<fileSets>
<fileSet>
- <directory>target/appassembler/bin</directory>
+ <directory>src/main/resources/conf</directory>
+ <outputDirectory>conf</outputDirectory>
+ <fileMode>0755</fileMode>
+ </fileSet>
+ <fileSet>
+ <directory>src/main/resources/scripts</directory>
<outputDirectory>bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>target/appassembler/lib</directory>
<outputDirectory>lib</outputDirectory>
+ <includes>
+ <include>*.jar</include>
+ </includes>
+ <fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>target</directory>
<outputDirectory>lib</outputDirectory>
<includes>
- <include>*.jar</include>
+ <include>a-hive-patch.jar</include>
</includes>
+ <fileMode>0755</fileMode>
</fileSet>
</fileSets>
-</assembly>
+</assembly>
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index d3bcaca..7b88de4 100644
--- a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -36,16 +36,20 @@
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.mapred.Reporter;
import edu.uci.ics.hivesterix.common.config.ConfUtil;
import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
@@ -214,7 +218,6 @@
// get all leave Ops
getLeaves(rootOps, leaveOps);
-
HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();
try {
translator.translate(rootOps, null, aliasToPath);
@@ -222,7 +225,7 @@
ILogicalPlan plan = translator.genLogicalPlan();
if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {
- translator.printOperators();
+ //translator.printOperators();
ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,
translator.getMetadataProvider());
@@ -238,7 +241,7 @@
StringBuilder buffer = new StringBuilder();
PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
String planStr = buffer.toString();
- System.out.println(planStr);
+ LOG.info(planStr);
if (planPrinter != null)
planPrinter.print(planStr);
@@ -377,6 +380,12 @@
// remove map-reduce branches in condition task
ConditionalTask condition = (ConditionalTask) task;
List<Task<? extends Serializable>> branches = condition.getListTasks();
+ for (Task branch : branches) {
+ if (branch instanceof MoveTask) {
+ //return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);
+ return null;
+ }
+ }
for (int i = branches.size() - 1; i >= 0; i--) {
Task branch = branches.get(i);
if (branch instanceof MapRedTask) {
@@ -396,7 +405,7 @@
MapRedTask mrtask = (MapRedTask) task;
MapredWork work = (MapredWork) mrtask.getWork();
- HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> operators = work.getAliasToWork();
Set entries = operators.entrySet();
Iterator<Entry<String, Operator>> iterator = entries.iterator();
@@ -414,7 +423,7 @@
// get map local work
MapredLocalWork localWork = work.getMapLocalWork();
if (localWork != null) {
- HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> localOperators = localWork.getAliasToWork();
Set localEntries = localOperators.entrySet();
Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();
@@ -479,9 +488,9 @@
for (Operator childMap : childMapOps) {
if (childMap instanceof TableScanOperator) {
TableScanDesc topDesc = (TableScanDesc) childMap.getConf();
- if (topDesc == null)
+ if (topDesc == null || topDesc.getAlias() == null) {
mapChildren.add(childMap);
- else {
+ } else {
rootOps.add(childMap);
}
} else {
@@ -501,9 +510,14 @@
}
i = 0;
for (Operator child : mapChildren) {
- if (child.getParentOperators() == null || child.getParentOperators().size() == 0)
+ if (child.getParentOperators() == null || child.getParentOperators().size() == 0) {
child.setParentOperators(new ArrayList<Operator>());
- child.getParentOperators().add(leafs.get(i));
+ }
+ if (i < leafs.size()) {
+ if (child.getParentOperators().size()==0) {
+ child.getParentOperators().add(leafs.get(i));
+ }
+ }
i++;
}
}
@@ -603,10 +617,10 @@
String specPath = desc.getDirName();
DynamicPartitionCtx dpCtx = desc.getDynPartCtx();
// for 0.7.0
- fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
+ //fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
// for 0.8.0
- // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,
- // desc);
+ //Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc);
+ Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc, Reporter.NULL);
}
}
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
index 4ef74e9..64a3f12 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1,17 +1,3 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -42,11 +28,13 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -62,6 +50,7 @@
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -86,23 +75,22 @@
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.ErrorMsg;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
@@ -112,6 +100,7 @@
import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -127,15 +116,18 @@
import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-@SuppressWarnings({ "deprecation", "unused" })
+@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
public class Driver implements CommandProcessor {
+ // hivesterix
+ private IExecutionEngine engine;
+ private boolean hivesterix = false;
+ private Set<Task> executedConditionalTsks = new HashSet<Task>();
+
static final private Log LOG = LogFactory.getLog(Driver.class.getName());
static final private LogHelper console = new LogHelper(LOG);
- // hive-sterix
- private IExecutionEngine engine;
- private boolean hivesterix = false;
+ private static final Object compileMonitor = new Object();
private int maxRows = 100;
ByteStream.Output bos = new ByteStream.Output();
@@ -152,23 +144,57 @@
// A limit on the number of threads that can be launched
private int maxthreads;
- private final int sleeptime = 2000;
-
+ private static final int SLEEP_TIME = 2000;
protected int tryCount = Integer.MAX_VALUE;
- private int checkLockManager() {
+ /**
+ * for backwards compatibility with current tests
+ */
+ public Driver(HiveConf conf) {
+ this.conf = conf;
+
+ }
+
+ public Driver() {
+ if (SessionState.get() != null) {
+ conf = SessionState.get().getConf();
+ }
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ // hivesterix: plan printer
+ public Driver(HiveConf conf, PrintWriter planPrinter) {
+ this.conf = conf;
+ engine = new HyracksExecutionEngine(conf, planPrinter);
+ }
+
+ public void clear() {
+ this.hivesterix = false;
+ this.executedConditionalTsks.clear();
+ }
+
+ private boolean checkLockManager() {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency && (hiveLockMgr == null)) {
+ if (!supportConcurrency) {
+ return false;
+ }
+ if ((hiveLockMgr == null)) {
try {
setLockManager();
} catch (SemanticException e) {
errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ return false;
}
}
- return (0);
+ // the reason that we set the lock manager for the cxt here is because each
+ // query has its own ctx object. The hiveLockMgr is shared accross the
+ // same instance of Driver, which can run multiple queries.
+ ctx.setHiveLockMgr(hiveLockMgr);
+ return hiveLockMgr != null;
}
private void setLockManager() throws SemanticException {
@@ -183,6 +209,16 @@
hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
} catch (Exception e) {
+ // set hiveLockMgr to null just in case this invalid manager got set to
+ // next query's ctx.
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e1) {
+ //nothing can do here
+ }
+ hiveLockMgr = null;
+ }
throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
}
}
@@ -230,10 +266,8 @@
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
- // partitioned tables don't have tableDesc set on the FetchTask.
- // Instead
- // they have a list of PartitionDesc objects, each with a table
- // desc.
+ // partitioned tables don't have tableDesc set on the FetchTask. Instead
+ // they have a list of PartitionDesc objects, each with a table desc.
// Let's
// try to fetch the desc for the first partition and use it's
// deserializer.
@@ -320,59 +354,102 @@
}
/**
- * for backwards compatibility with current tests
- */
- public Driver(HiveConf conf) {
- this.conf = conf;
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- public Driver() {
- if (SessionState.get() != null) {
- conf = SessionState.get().getConf();
- }
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- // hivesterix: plan printer
- public Driver(HiveConf conf, PrintWriter planPrinter) {
- this.conf = conf;
- engine = new HyracksExecutionEngine(conf, planPrinter);
- }
-
- public void clear() {
- this.hivesterix = false;
- }
-
- /**
- * Compile a new query. Any currently-planned query associated with this
- * Driver is discarded.
+ * Compile a new query. Any currently-planned query associated with this Driver is discarded.
*
* @param command
* The SQL query to compile.
*/
public int compile(String command) {
+ return compile(command, true);
+ }
+
+ /**
+ * Hold state variables specific to each query being executed, that may not
+ * be consistent in the overall SessionState
+ */
+ private static class QueryState {
+ private HiveOperation op;
+ private String cmd;
+ private boolean init = false;
+
+ /**
+ * Initialize the queryState with the query state variables
+ */
+ public void init(HiveOperation op, String cmd) {
+ this.op = op;
+ this.cmd = cmd;
+ this.init = true;
+ }
+
+ public boolean isInitialized() {
+ return this.init;
+ }
+
+ public HiveOperation getOp() {
+ return this.op;
+ }
+
+ public String getCmd() {
+ return this.cmd;
+ }
+ }
+
+ public void saveSession(QueryState qs) {
+ SessionState oldss = SessionState.get();
+ if (oldss != null && oldss.getHiveOperation() != null) {
+ qs.init(oldss.getHiveOperation(), oldss.getCmd());
+ }
+ }
+
+ public void restoreSession(QueryState qs) {
+ SessionState ss = SessionState.get();
+ if (ss != null && qs != null && qs.isInitialized()) {
+ ss.setCmd(qs.getCmd());
+ ss.setCommandType(qs.getOp());
+ }
+ }
+
+ /**
+ * Compile a new query, but potentially reset taskID counter. Not resetting task counter
+ * is useful for generating re-entrant QL queries.
+ *
+ * @param command
+ * The HiveQL query to compile
+ * @param resetTaskIds
+ * Resets taskID counter if true.
+ * @return 0 for ok
+ */
+ public int compile(String command, boolean resetTaskIds) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.COMPILE);
+
+ //holder for parent command type/string when executing reentrant queries
+ QueryState queryState = new QueryState();
+
if (plan != null) {
close();
plan = null;
}
- TaskFactory.resetId();
+ if (resetTaskIds) {
+ TaskFactory.resetId();
+ }
+ saveSession(queryState);
try {
command = new VariableSubstitution().substitute(conf, command);
ctx = new Context(conf);
+ ctx.setTryCount(getTryCount());
+ ctx.setCmd(command);
+ ctx.setHDFSCleanup(true);
ParseDriver pd = new ParseDriver();
ASTNode tree = pd.parse(command, ctx);
tree = ParseUtils.findRootNonNullToken(tree);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
- List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+ List<AbstractSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK,
+ AbstractSemanticAnalyzerHook.class);
// Do semantic analysis and plan generation
if (saHooks != null) {
@@ -382,6 +459,7 @@
tree = hook.preAnalyze(hookCtx, tree);
}
sem.analyze(tree, ctx);
+ hookCtx.update(sem);
for (AbstractSemanticAnalyzerHook hook : saHooks) {
hook.postAnalyze(hookCtx, sem.getRootTasks());
}
@@ -394,19 +472,10 @@
// validate the plan
sem.validate();
- plan = new QueryPlan(command, sem);
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
+ plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN));
// test Only - serialize the query plan and deserialize it
- if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
-
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) {
String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
LOG.info("query plan = " + queryPlanFileName);
@@ -431,17 +500,24 @@
plan.getFetchTask().initialize(conf, plan, null);
}
- // do the authorization check
+ // get the output schema
+ schema = getSchema(sem, conf);
+
+ //do the authorization check
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
- // doAuthorization(sem);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION);
+ doAuthorization(sem);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage()
+ ". Use show grant to get more details.");
return 403;
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION);
}
}
+ //restore state after we're done executing a specific query
// hyracks run
if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
int engineRet = engine.compileJob(sem.getRootTasks());
@@ -450,21 +526,19 @@
}
}
return 0;
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (ParseException e) {
- errorMessage = "FAILED: Parse Error: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (11);
} catch (Exception e) {
- errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
+ errorMessage = "FAILED: " + e.getClass().getSimpleName();
+ if (error != ErrorMsg.GENERIC_ERROR) {
+ errorMessage += " [Error " + error.getErrorCode() + "]:";
+ }
+ errorMessage += " " + e.getMessage();
+ SQLState = error.getSQLState();
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return error.getErrorCode();
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.COMPILE);
+ restoreSession(queryState);
}
}
@@ -479,13 +553,13 @@
ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
} else {
- // if (op.equals(HiveOperation.IMPORT)) {
- // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
- // if (!isa.existsTable()) {
- ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
- HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
- // }
- // }
+ if (op.equals(HiveOperation.IMPORT)) {
+ ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+ if (!isa.existsTable()) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ }
+ }
}
if (outputs != null && outputs.size() > 0) {
for (WriteEntity write : outputs) {
@@ -513,8 +587,8 @@
Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
for (ReadEntity read : inputs) {
- if (read.getPartition() != null) {
- Table tbl = read.getTable();
+ Table tbl = read.getTable();
+ if ((read.getPartition() != null) || (tbl.isPartitioned())) {
String tblName = tbl.getTableName();
if (tableUsePartLevelAuth.get(tblName) == null) {
boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
@@ -533,9 +607,9 @@
ParseContext parseCtx = querySem.getParseContext();
Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
- for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem.getParseContext()
.getTopOps().entrySet()) {
- Operator<? extends Serializable> topOp = topOpMap.getValue();
+ Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
TableScanOperator tableScanOp = (TableScanOperator) topOp;
Table tbl = tsoTopMap.get(tableScanOp);
@@ -551,7 +625,10 @@
cols.add(columns.get(i).getName());
}
}
- if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+ //map may not contain all sources, since input list may have been optimized out
+ //or non-existent tho such sources may still be referenced by the TableScanOperator
+ //if it's null then the partition probably doesn't exist so let's use table permission
+ if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
@@ -582,30 +659,28 @@
// cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
for (ReadEntity read : inputs) {
- Table tbl = null;
+ Table tbl = read.getTable();
if (read.getPartition() != null) {
- tbl = read.getPartition().getTable();
+ Partition partition = read.getPartition();
+ tbl = partition.getTable();
// use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName())) {
- List<String> cols = part2Cols.get(read.getPartition());
+ if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ List<String> cols = part2Cols.get(partition);
if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+ ss.getAuthorizer().authorize(partition.getTable(), partition, cols,
op.getInputRequiredPrivileges(), null);
} else {
- ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+ ss.getAuthorizer().authorize(partition, op.getInputRequiredPrivileges(), null);
}
continue;
}
- } else if (read.getTable() != null) {
- tbl = read.getTable();
}
- // if we reach here, it means it needs to do a table
- // authorization
- // check, and the table authorization may already happened
- // because of other
+ // if we reach here, it means it needs to do a table authorization
+ // check, and the table authorization may already happened because of other
// partitions
- if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+ if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())
+ && !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) {
List<String> cols = tab2Cols.get(tbl);
if (cols != null && cols.size() > 0) {
ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
@@ -632,16 +707,15 @@
* @param p
* The partition to be locked
* @param mode
- * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
- * objects to be locked. If a partition needs to be locked (in
- * any mode), all its parents should also be locked in SHARED
- * mode.
+ * The mode of the lock (SHARED/EXCLUSIVE) Get the list of objects to be locked. If a
+ * partition needs to be locked (in any mode), all its parents should also be locked in
+ * SHARED mode.
**/
private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
if (t != null) {
locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
@@ -665,16 +739,20 @@
name = p.getName().split("@")[2];
}
- String partName = name;
String partialName = "";
String[] partns = name.split("/");
int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+ Map<String, String> partialSpec = new LinkedHashMap<String, String>();
for (int idx = 0; idx < len; idx++) {
String partn = partns[idx];
partialName += partn;
+ String[] nameValue = partn.split("=");
+ assert (nameValue.length == 2);
+ partialSpec.put(nameValue[0], nameValue[1]);
try {
locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
- .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+ .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName, partialSpec),
+ lockData), mode));
partialName += "/";
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
@@ -688,17 +766,16 @@
}
/**
- * Acquire read and write locks needed by the statement. The list of objects
- * to be locked are obtained from he inputs and outputs populated by the
- * compiler. The lock acuisition scheme is pretty simple. If all the locks
- * cannot be obtained, error out. Deadlock is avoided by making sure that
- * the locks are lexicographically sorted.
+ * Acquire read and write locks needed by the statement. The list of objects to be locked are
+ * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is
+ * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
+ * sure that the locks are lexicographically sorted.
**/
public int acquireReadWriteLocks() {
- try {
- int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
- int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
+ try {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
if (!supportConcurrency) {
return 0;
@@ -707,8 +784,7 @@
List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
// Sort all the inputs, outputs.
- // If a lock needs to be acquired on any partition, a read lock
- // needs to be acquired on all
+ // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all
// its parents also
for (ReadEntity input : plan.getInputs()) {
if (input.getType() == ReadEntity.Type.TABLE) {
@@ -719,16 +795,21 @@
}
for (WriteEntity output : plan.getOutputs()) {
+ List<HiveLockObj> lockObj = null;
if (output.getTyp() == WriteEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(output.getTable(), null,
- output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+ lockObj = getLockObjects(output.getTable(), null, output.isComplete() ? HiveLockMode.EXCLUSIVE
+ : HiveLockMode.SHARED);
} else if (output.getTyp() == WriteEntity.Type.PARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE);
}
- // In case of dynamic queries, it is possible to have incomplete
- // dummy partitions
+ // In case of dynamic queries, it is possible to have incomplete dummy partitions
else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.SHARED);
+ }
+
+ if (lockObj != null) {
+ lockObjects.addAll(lockObj);
+ ctx.getOutputLockObjects().put(output, lockObj);
}
}
@@ -736,13 +817,8 @@
return 0;
}
- int ret = checkLockManager();
- if (ret != 0) {
- return ret;
- }
-
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
// Lock the database also
try {
@@ -753,25 +829,7 @@
throw new SemanticException(e.getMessage());
}
- ctx.setHiveLockMgr(hiveLockMgr);
- List<HiveLock> hiveLocks = null;
-
- int tryNum = 1;
- do {
-
- // ctx.getHiveLockMgr();
- // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
-
- if (hiveLocks != null) {
- break;
- }
-
- tryNum++;
- try {
- Thread.sleep(sleepTime);
- } catch (InterruptedException e) {
- }
- } while (tryNum < numRetries);
+ List<HiveLock> hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
if (hiveLocks == null) {
throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
@@ -785,138 +843,207 @@
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- } catch (Exception e) {
+ } catch (LockException e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- }
- }
-
- /**
- * Release all the locks acquired implicitly by the statement. Note that the
- * locks acquired with 'keepAlive' set to True are not released.
- **/
- private void releaseLocks() {
- if (ctx != null && ctx.getHiveLockMgr() != null) {
- try {
- ctx.getHiveLockMgr().close();
- ctx.setHiveLocks(null);
- } catch (LockException e) {
- }
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
/**
* @param hiveLocks
- * list of hive locks to be released Release all the locks
- * specified. If some of the locks have already been released,
- * ignore them
+ * list of hive locks to be released Release all the locks specified. If some of the
+ * locks have already been released, ignore them
**/
private void releaseLocks(List<HiveLock> hiveLocks) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.RELEASE_LOCKS);
+
if (hiveLocks != null) {
ctx.getHiveLockMgr().releaseLocks(hiveLocks);
}
ctx.setHiveLocks(null);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.RELEASE_LOCKS);
}
- public CommandProcessorResponse run(String command) {
+ public CommandProcessorResponse run(String command) throws CommandNeedRetryException {
errorMessage = null;
SQLState = null;
- int ret = compile(command);
+ if (!validateConfVariables()) {
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ HiveDriverRunHookContext hookContext = new HiveDriverRunHookContextImpl(conf, command);
+ // Get all the driver run hooks and pre-execute them.
+ List<HiveDriverRunHook> driverRunHooks;
+ try {
+ driverRunHooks = getHooks(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class);
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.preDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ // Reset the perf logger
+ PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT);
+
+ int ret;
+ synchronized (compileMonitor) {
+ ret = compile(command);
+ }
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // ret = acquireReadWriteLocks();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ boolean requireLock = false;
+ boolean ckLock = checkLockManager();
+
+ if (ckLock) {
+ boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
+ if (lockOnlyMapred) {
+ Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>();
+ taskQueue.addAll(plan.getRootTasks());
+ while (taskQueue.peek() != null) {
+ Task<? extends Serializable> tsk = taskQueue.remove();
+ requireLock = requireLock || tsk.requireLock();
+ if (requireLock) {
+ break;
+ }
+ if (tsk instanceof ConditionalTask) {
+ taskQueue.addAll(((ConditionalTask) tsk).getListTasks());
+ }
+ if (tsk.getChildTasks() != null) {
+ taskQueue.addAll(tsk.getChildTasks());
+ }
+ // does not add back up task here, because back up task should be the same
+ // type of the original task.
+ }
+ } else {
+ requireLock = true;
+ }
+ }
+
+ if (requireLock) {
+ ret = acquireReadWriteLocks();
+ if (ret != 0) {
+ releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
}
ret = execute();
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.close(LOG, plan);
+
+ // Take all the driver run hooks and post-execute them.
+ try {
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.postDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
return new CommandProcessorResponse(ret);
}
- private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
- ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
- String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
- if (pestr == null) {
- return saHooks;
+ /**
+ * Validate configuration variables.
+ *
+ * @return
+ */
+ private boolean validateConfVariables() {
+ boolean valid = true;
+ if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES))
+ && ((conf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE))
+ || (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) || ((conf
+ .getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE))))) {
+ errorMessage = "FAILED: Hive Internal Error: " + ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getMsg();
+ SQLState = ErrorMsg.findSQLState(errorMessage);
+ console.printError(errorMessage + "\n");
+ valid = false;
}
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return saHooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
- saHooks.add(hook);
- } catch (HiveException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return saHooks;
+ return valid;
}
- private List<Hook> getPreExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
+ /**
+ * Returns a set of hooks specified in a configuration variable.
+ * See getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz)
+ *
+ * @param hookConfVar
+ * @return
+ * @throws Exception
+ */
+ private List<Hook> getHooks(HiveConf.ConfVars hookConfVar) throws Exception {
+ return getHooks(hookConfVar, Hook.class);
+ }
+
+ /**
+ * Returns the hooks specified in a configuration variable. The hooks are returned in a list in
+ * the order they were specified in the configuration variable.
+ *
+ * @param hookConfVar
+ * The configuration variable specifying a comma separated list of the hook
+ * class names.
+ * @param clazz
+ * The super type of the hooks.
+ * @return A list of the hooks cast as the type specified in clazz, in the order
+ * they are listed in the value of hookConfVar
+ * @throws Exception
+ */
+ private <T extends Hook> List<T> getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz) throws Exception {
+
+ List<T> hooks = new ArrayList<T>();
+ String csHooks = conf.getVar(hookConfVar);
+ if (csHooks == null) {
+ return hooks;
}
- String[] peClasses = pestr.split(",");
+ csHooks = csHooks.trim();
+ if (csHooks.equals("")) {
+ return hooks;
+ }
- for (String peClass : peClasses) {
+ String[] hookClasses = csHooks.split(",");
+
+ for (String hookClass : hookClasses) {
try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ T hook = (T) Class.forName(hookClass.trim(), true, JavaUtils.getClassLoader()).newInstance();
+ hooks.add(hook);
} catch (ClassNotFoundException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ console.printError(hookConfVar.varname + " Class not found:" + e.getMessage());
throw e;
}
}
- return pehooks;
+ return hooks;
}
- private List<Hook> getPostExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Post Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- public int execute() {
+ public int execute() throws CommandNeedRetryException {
// execute hivesterix plan
if (hivesterix) {
hivesterix = false;
@@ -925,6 +1052,9 @@
return ret;
}
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_EXECUTE);
+
boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
@@ -933,6 +1063,10 @@
conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+
+ conf.set("mapreduce.workflow.id", "hive_" + queryId);
+ conf.set("mapreduce.workflow.name", queryStr);
+
maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
try {
@@ -946,14 +1080,23 @@
}
resStream = null;
- HookContext hookContext = new HookContext(plan, conf);
+ HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+ hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
- for (Hook peh : getPreExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
} else if (peh instanceof PreExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
}
}
@@ -968,32 +1111,36 @@
}
String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
- // A runtime that launches runnable tasks as separate Threads
- // through
+ // A runtime that launches runnable tasks as separate Threads through
// TaskRunners
// As soon as a task isRunnable, it is put in a queue
// At any time, at most maxthreads tasks can be running
- // The main thread polls the TaskRunners to check if they have
- // finished.
+ // The main thread polls the TaskRunners to check if they have finished.
- Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+ Queue<Task<? extends Serializable>> runnable = new ConcurrentLinkedQueue<Task<? extends Serializable>>();
Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
DriverContext driverCxt = new DriverContext(runnable, ctx);
+ ctx.setHDFSCleanup(true);
+
+ SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>());
+ SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
+ SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
// Add root Tasks to runnable
-
for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ // This should never happen, if it does, it's a bug with the potential to produce
+ // incorrect results.
+ assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
driverCxt.addToRunnable(tsk);
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.TIME_TO_SUBMIT);
// Loop while you either have tasks running, or tasks queued up
-
while (running.size() != 0 || runnable.peek() != null) {
// Launch upto maxthreads tasks
while (runnable.peek() != null && running.size() < maxthreads) {
Task<? extends Serializable> tsk = runnable.remove();
- console.printInfo("executing task " + tsk.getName());
launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
}
@@ -1005,12 +1152,24 @@
int exitVal = tskRes.getExitVal();
if (exitVal != 0) {
+ if (tsk.ifRetryCmdWhenFail()) {
+ if (!running.isEmpty()) {
+ taskCleanup(running);
+ }
+ // in case we decided to run everything in local mode, restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+ throw new CommandNeedRetryException();
+ }
Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
if (backupTask != null) {
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
console.printError(errorMessage);
-
errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
console.printError(errorMessage);
@@ -1021,20 +1180,31 @@
continue;
} else {
- // TODO: This error messaging is not very informative.
- // Fix that.
+ hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK);
+ // Get all the failure execution hooks and execute them.
+ for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+
+ ((ExecuteWithHookContext) ofh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+ }
+
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
SQLState = "08S01";
console.printError(errorMessage);
- if (running.size() != 0) {
- taskCleanup();
+ if (!running.isEmpty()) {
+ taskCleanup(running);
}
- // in case we decided to run everything in local mode,
- // restore the
+ // in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
- return 9;
+ return exitVal;
}
}
@@ -1047,9 +1217,9 @@
if (tsk.getChildTasks() != null) {
for (Task<? extends Serializable> child : tsk.getChildTasks()) {
// hivesterix: don't check launchable condition
- // if (DriverContext.isLaunchable(child)) {
+ //if(DriverContext.isLaunchable(tsk)){
driverCxt.addToRunnable(child);
- // }
+ //}
}
}
}
@@ -1059,8 +1229,7 @@
ctx.restoreOriginalTracker();
// remove incomplete outputs.
- // Some incomplete outputs may be added at the beginning, for eg:
- // for dynamic partitions.
+ // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
// remove them
HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
for (WriteEntity output : plan.getOutputs()) {
@@ -1073,15 +1242,24 @@
plan.getOutputs().remove(output);
}
+ hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
// Get all the post execution hooks and execute them.
- for (Hook peh : getPostExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
} else if (peh instanceof PostExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((PostExecute) peh)
.run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
(SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
: null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
}
}
@@ -1089,7 +1267,10 @@
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
SessionState.get().getHiveHistory().printRowCount(queryId);
}
+ } catch (CommandNeedRetryException e) {
+ throw e;
} catch (Exception e) {
+ ctx.restoreOriginalTracker();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
}
@@ -1105,6 +1286,18 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_EXECUTE);
+
+ if (SessionState.get().getLastMapRedStatsList() != null
+ && SessionState.get().getLastMapRedStatsList().size() > 0) {
+ long totalCpu = 0;
+ console.printInfo("MapReduce Jobs Launched: ");
+ for (int i = 0; i < SessionState.get().getLastMapRedStatsList().size(); i++) {
+ console.printInfo("Job " + i + ": " + SessionState.get().getLastMapRedStatsList().get(i));
+ totalCpu += SessionState.get().getLastMapRedStatsList().get(i).getCpuMSec();
+ }
+ console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
+ }
}
plan.setDone();
@@ -1134,14 +1327,12 @@
* name of the task, if it is a map-reduce job
* @param jobs
* number of map-reduce jobs
- * @param curJobNo
- * the sequential number of the next map-reduce job
- * @return the updated number of last the map-reduce job launched
+ * @param cxt
+ * the driver context
*/
public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
-
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
}
@@ -1149,6 +1340,8 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
}
+ conf.set("mapreduce.workflow.node.name", tsk.getId());
+ Utilities.setWorkflowAdjacencies(conf, plan);
cxt.incCurJobNo(1);
console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
}
@@ -1156,7 +1349,13 @@
TaskResult tskRes = new TaskResult();
TaskRunner tskRun = new TaskRunner(tsk, tskRes);
- // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+ // Launch Task
+ //if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) {
+ // Launch it in the parallel mode, as a separate thread only for MR tasks
+ // tskRun.start();
+ //} else {
+ // tskRun.runSequential();
+ //}
// Launch Task: hivesterix tweak
if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
// Launch it in the parallel mode, as a separate thread only for MR
@@ -1169,11 +1368,27 @@
if (crs instanceof ConditionalResolverMergeFiles) {
tskRes.setRunning(false);
tskRes.setExitVal(0);
-
- List<Task<? extends Serializable>> children = condTask.getListTasks();
- for (Task<? extends Serializable> child : children)
- if (child instanceof MapRedTask)
- cxt.addToRunnable(child);
+ if (!executedConditionalTsks.contains(tsk)) {
+ List<Task<? extends Serializable>> children = condTask.getListTasks();
+ Task<? extends Serializable> selectedBranch = null;
+ for (Task<? extends Serializable> branch : children) {
+ if (branch instanceof MoveTask) {
+ selectedBranch = branch;
+ break;
+ }
+ }
+ if (selectedBranch == null) {
+ for (int i = children.size() - 1; i >= 0; i--) {
+ Task<? extends Serializable> child = children.get(i);
+ if (child instanceof MapRedTask) {
+ selectedBranch = child;
+ break;
+ }
+ }
+ }
+ executedConditionalTsks.add(tsk);
+ cxt.addToRunnable(selectedBranch);
+ }
}
} else {
tskRun.runSequential();
@@ -1185,12 +1400,18 @@
/**
* Cleans up remaining tasks in case of failure
*/
-
- public void taskCleanup() {
- // The currently existing Shutdown hooks will be automatically called,
- // killing the map-reduce processes.
- // The non MR processes will be killed as well.
- System.exit(9);
+ public void taskCleanup(Map<TaskResult, TaskRunner> running) {
+ for (Map.Entry<TaskResult, TaskRunner> entry : running.entrySet()) {
+ if (entry.getKey().isRunning()) {
+ Task<?> task = entry.getValue().getTask();
+ try {
+ task.shutdown();
+ } catch (Exception e) {
+ console.printError("Exception on shutting down task " + task.getId() + ": " + e);
+ }
+ }
+ }
+ running.clear();
}
/**
@@ -1214,7 +1435,7 @@
// In this loop, nothing was found
// Sleep 10 seconds and restart
try {
- Thread.sleep(sleeptime);
+ Thread.sleep(SLEEP_TIME);
} catch (InterruptedException ie) {
// Do Nothing
;
@@ -1223,7 +1444,7 @@
}
}
- public boolean getResults(ArrayList<String> res) throws IOException {
+ public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
if (plan != null && plan.getFetchTask() != null) {
FetchTask ft = plan.getFetchTask();
ft.setMaxRows(maxRows);
@@ -1276,6 +1497,14 @@
return true;
}
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+
public int close() {
try {
if (plan != null) {
@@ -1308,18 +1537,21 @@
}
public void destroy() {
- releaseLocks();
+ if (ctx != null) {
+ releaseLocks(ctx.getHiveLocks());
+ }
+
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e) {
+ LOG.warn("Exception in closing hive lock manager. "
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ }
}
public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
return plan.getQueryPlan();
}
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
index 2d5191d..1b96259 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -68,6 +68,7 @@
@Override
public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+ @SuppressWarnings("deprecation")
TypeInfo[] parameters = paramInfo.getParameters();
if (parameters.length == 0) {
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
index 0fea4b9..e26f477 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
@@ -36,7 +36,7 @@
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
-import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
@@ -65,15 +65,18 @@
return result;
}
- public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(Constants.VOID_TYPE_NAME);
- public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME);
- public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(Constants.INT_TYPE_NAME);
- public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(Constants.BIGINT_TYPE_NAME);
- public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(Constants.STRING_TYPE_NAME);
- public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(Constants.FLOAT_TYPE_NAME);
- public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(Constants.DOUBLE_TYPE_NAME);
- public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME);
- public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(Constants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME);
+ public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME);
+ public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME);
+ public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME);
+ public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
+ public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME);
+ public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME);
+ public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME);
+ public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME);
+ public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME);
+ public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME);
public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
deleted file mode 100644
index 23a842a..0000000
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
+++ /dev/null
@@ -1,773 +0,0 @@
-<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<configuration>
-
- <!-- Hive Configuration can either be stored in this file or in the hadoop
- configuration files -->
- <!-- that are implied by Hadoop setup variables. -->
- <!-- Aside from Hadoop setup variables - this file is provided as a convenience
- so that Hive -->
- <!-- users do not have to edit hadoop configuration files (that may be managed
- as a centralized -->
- <!-- resource). -->
-
- <!-- Hive Execution Parameters -->
- <property>
- <name>mapred.reduce.tasks</name>
- <value>-1</value>
- <description>The default number of reduce tasks per job. Typically set
- to a prime close to the number of available hosts. Ignored when
- mapred.job.tracker is "local". Hadoop set this to 1 by default,
- whereas hive uses -1 as its default value.
- By setting this property to -1, Hive will automatically figure out what
- should be the number of reducers.
- </description>
- </property>
-
- <property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>4</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>33554432</value>
- </property>
-
- <property>
- <name>hive.exec.reducers.bytes.per.reducer</name>
- <value>1000000000</value>
- <description>size per reducer.The default is 1G, i.e if the input size
- is 10G, it will use 10 reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.reducers.max</name>
- <value>999</value>
- <description>max number of reducers will be used. If the one
- specified in the configuration parameter mapred.reduce.tasks is
- negative, hive will use this one as the max number of reducers when
- automatically determine number of reducers.</description>
- </property>
-
- <property>
- <name>hive.exec.scratchdir</name>
- <value>/hive-${user.name}</value>
- <description>Scratch space for Hive jobs</description>
- </property>
-
- <property>
- <name>hive.test.mode</name>
- <value>false</value>
- <description>whether hive is running in test mode. If yes, it turns on
- sampling and prefixes the output tablename</description>
- </property>
-
- <property>
- <name>hive.test.mode.prefix</name>
- <value>test_</value>
- <description>if hive is running in test mode, prefixes the output
- table by this string</description>
- </property>
-
- <!-- If the input table is not bucketed, the denominator of the tablesample
- is determinied by the parameter below -->
- <!-- For example, the following query: -->
- <!-- INSERT OVERWRITE TABLE dest -->
- <!-- SELECT col1 from src -->
- <!-- would be converted to -->
- <!-- INSERT OVERWRITE TABLE test_dest -->
- <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
- <property>
- <name>hive.test.mode.samplefreq</name>
- <value>32</value>
- <description>if hive is running in test mode and table is not
- bucketed, sampling frequency</description>
- </property>
-
- <property>
- <name>hive.test.mode.nosamplelist</name>
- <value></value>
- <description>if hive is running in test mode, dont sample the above
- comma seperated list of tables</description>
- </property>
-
- <property>
- <name>hive.metastore.local</name>
- <value>true</value>
- <description>controls whether to connect to remove metastore server or
- open a new metastore server in Hive Client JVM</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionURL</name>
- <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
- <description>JDBC connect string for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionDriverName</name>
- <value>org.apache.derby.jdbc.EmbeddedDriver</value>
- <description>Driver class name for a JDBC metastore</description>
- </property>
-
- <property>
- <name>javax.jdo.PersistenceManagerFactoryClass</name>
- <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
- <description>class implementing the jdo persistence</description>
- </property>
-
- <property>
- <name>datanucleus.connectionPoolingType</name>
- <value>DBCP</value>
- <description>Uses a DBCP connection pool for JDBC metastore
- </description>
- </property>
-
- <property>
- <name>javax.jdo.option.DetachAllOnCommit</name>
- <value>true</value>
- <description>detaches all objects from session so that they can be
- used after transaction is committed</description>
- </property>
-
- <property>
- <name>javax.jdo.option.NonTransactionalRead</name>
- <value>true</value>
- <description>reads outside of transactions</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionUserName</name>
- <value>APP</value>
- <description>username to use against metastore database</description>
- </property>
-
- <property>
- <name>javax.jdo.option.ConnectionPassword</name>
- <value>mine</value>
- <description>password to use against metastore database</description>
- </property>
-
- <property>
- <name>datanucleus.validateTables</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateColumns</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.validateConstraints</name>
- <value>false</value>
- <description>validates existing schema against code. turn this on if
- you want to verify existing schema </description>
- </property>
-
- <property>
- <name>datanucleus.storeManagerType</name>
- <value>rdbms</value>
- <description>metadata store type</description>
- </property>
-
- <property>
- <name>datanucleus.autoCreateSchema</name>
- <value>true</value>
- <description>creates necessary schema on a startup if one doesn't
- exist. set this to false, after creating it once</description>
- </property>
-
- <property>
- <name>datanucleus.autoStartMechanismMode</name>
- <value>checked</value>
- <description>throw exception if metadata tables are incorrect
- </description>
- </property>
-
- <property>
- <name>datanucleus.transactionIsolation</name>
- <value>read-committed</value>
- <description>Default transaction isolation level for identity
- generation. </description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2</name>
- <value>false</value>
- <description>Use a level 2 cache. Turn this off if metadata is changed
- independently of hive metastore server</description>
- </property>
-
- <property>
- <name>datanucleus.cache.level2.type</name>
- <value>SOFT</value>
- <description>SOFT=soft reference based cache, WEAK=weak reference
- based cache.</description>
- </property>
-
- <property>
- <name>datanucleus.identifierFactory</name>
- <value>datanucleus</value>
- <description>Name of the identifier factory to use when generating
- table/column names etc. 'datanucleus' is used for backward
- compatibility</description>
- </property>
-
- <property>
- <name>hive.metastore.warehouse.dir</name>
- <value>/user/hivesterix</value>
- <description>location of default database for the warehouse
- </description>
- </property>
-
- <property>
- <name>hive.metastore.connect.retries</name>
- <value>5</value>
- <description>Number of retries while opening a connection to metastore
- </description>
- </property>
-
- <property>
- <name>hive.metastore.rawstore.impl</name>
- <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
- <description>Name of the class that implements
- org.apache.hadoop.hive.metastore.rawstore interface. This class is
- used to store and retrieval of raw metadata objects such as table,
- database</description>
- </property>
-
- <property>
- <name>hive.default.fileformat</name>
- <value>TextFile</value>
- <description>Default file format for CREATE TABLE statement. Options
- are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
- ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
- </property>
-
- <property>
- <name>hive.fileformat.check</name>
- <value>true</value>
- <description>Whether to check file format or not when loading data
- files</description>
- </property>
-
- <property>
- <name>hive.map.aggr</name>
- <value>true</value>
- <description>Whether to use map-side aggregation in Hive Group By
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.skewindata</name>
- <value>false</value>
- <description>Whether there is skew in data to optimize group by
- queries</description>
- </property>
-
- <property>
- <name>hive.groupby.mapaggr.checkinterval</name>
- <value>100000</value>
- <description>Number of rows after which size of the grouping
- keys/aggregation classes is performed</description>
- </property>
-
- <property>
- <name>hive.mapred.local.mem</name>
- <value>0</value>
- <description>For local mode, memory of the mappers/reducers
- </description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.percentmemory</name>
- <value>0.5</value>
- <description>Portion of total memory to be used by map-side grup
- aggregation hash table</description>
- </property>
-
- <property>
- <name>hive.map.aggr.hash.min.reduction</name>
- <value>0.5</value>
- <description>Hash aggregation will be turned off if the ratio between
- hash
- table size and input rows is bigger than this number. Set to 1 to make
- sure
- hash aggregation is never turned off.</description>
- </property>
-
- <property>
- <name>hive.optimize.cp</name>
- <value>true</value>
- <description>Whether to enable column pruner</description>
- </property>
-
- <property>
- <name>hive.optimize.ppd</name>
- <value>true</value>
- <description>Whether to enable predicate pushdown</description>
- </property>
-
- <property>
- <name>hive.optimize.pruner</name>
- <value>true</value>
- <description>Whether to enable the new partition pruner which depends
- on predicate pushdown. If this is disabled,
- the old partition pruner which is based on AST will be enabled.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.groupby</name>
- <value>true</value>
- <description>Whether to enable the bucketed group by from bucketed
- partitions/tables.</description>
- </property>
-
- <property>
- <name>hive.join.emit.interval</name>
- <value>1000</value>
- <description>How many rows in the right-most join operand Hive should
- buffer before emitting the join result. </description>
- </property>
-
- <property>
- <name>hive.join.cache.size</name>
- <value>25000</value>
- <description>How many rows in the joining tables (except the streaming
- table) should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.bucket.cache.size</name>
- <value>100</value>
- <description>How many values in each keys in the map-joined table
- should be cached in memory. </description>
- </property>
-
- <property>
- <name>hive.mapjoin.maxsize</name>
- <value>100000</value>
- <description>Maximum # of rows of the small table that can be handled
- by map-side join. If the size is reached and hive.task.progress is
- set, a fatal error counter is set and the job will be killed.
- </description>
- </property>
-
- <property>
- <name>hive.mapjoin.cache.numrows</name>
- <value>25000</value>
- <description>How many rows should be cached by jdbm for map join.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.skewjoin</name>
- <value>false</value>
- <description>Whether to enable skew join optimization. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.key</name>
- <value>100000</value>
- <description>Determine if we get a skew key in join. If we see more
- than the specified number of rows with the same key in join operator,
- we think the key as a skew join key. </description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.map.tasks</name>
- <value>10000</value>
- <description> Determine the number of map task used in the follow up
- map join job
- for a skew join. It should be used together with
- hive.skewjoin.mapjoin.min.split
- to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.skewjoin.mapjoin.min.split</name>
- <value>33554432</value>
- <description> Determine the number of map task at most used in the
- follow up map join job
- for a skew join by specifying the minimum split size. It should be used
- together with
- hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
- </property>
-
- <property>
- <name>hive.mapred.mode</name>
- <value>nonstrict</value>
- <description>The mode in which the hive operations are being
- performed. In strict mode, some risky queries are not allowed to run
- </description>
- </property>
-
- <property>
- <name>hive.exec.script.maxerrsize</name>
- <value>100000</value>
- <description>Maximum number of bytes a script is allowed to emit to
- standard error (per map-reduce task). This prevents runaway scripts
- from filling logs partitions to capacity </description>
- </property>
-
- <property>
- <name>hive.exec.script.allow.partial.consumption</name>
- <value>false</value>
- <description> When enabled, this option allows a user script to exit
- successfully without consuming all the data from the standard input.
- </description>
- </property>
-
- <property>
- <name>hive.script.operator.id.env.var</name>
- <value>HIVE_SCRIPT_OPERATOR_ID</value>
- <description> Name of the environment variable that holds the unique
- script operator ID in the user's transform function (the custom
- mapper/reducer that the user has specified in the query)
- </description>
- </property>
-
- <property>
- <name>hive.exec.compress.output</name>
- <value>false</value>
- <description> This controls whether the final outputs of a query (to a
- local/hdfs file or a hive table) is compressed. The compression codec
- and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.compress.intermediate</name>
- <value>false</value>
- <description> This controls whether intermediate files produced by
- hive between multiple map-reduce jobs are compressed. The compression
- codec and other options are determined from hadoop config variables
- mapred.output.compress* </description>
- </property>
-
- <property>
- <name>hive.exec.parallel</name>
- <value>false</value>
- <description>Whether to execute jobs in parallel</description>
- </property>
-
- <property>
- <name>hive.exec.parallel.thread.number</name>
- <value>8</value>
- <description>How many jobs at most can be executed in parallel
- </description>
- </property>
-
- <property>
- <name>hive.hwi.war.file</name>
- <value>lib\hive-hwi-0.7.0.war</value>
- <description>This sets the path to the HWI war file, relative to
- ${HIVE_HOME}. </description>
- </property>
-
- <property>
- <name>hive.hwi.listen.host</name>
- <value>0.0.0.0</value>
- <description>This is the host address the Hive Web Interface will
- listen on</description>
- </property>
-
- <property>
- <name>hive.hwi.listen.port</name>
- <value>9999</value>
- <description>This is the port the Hive Web Interface will listen on
- </description>
- </property>
-
- <property>
- <name>hive.exec.pre.hooks</name>
- <value></value>
- <description>Pre Execute Hook for Tests</description>
- </property>
-
- <property>
- <name>hive.merge.mapfiles</name>
- <value>true</value>
- <description>Merge small files at the end of a map-only job
- </description>
- </property>
-
- <property>
- <name>hive.merge.mapredfiles</name>
- <value>false</value>
- <description>Merge small files at the end of a map-reduce job
- </description>
- </property>
-
- <property>
- <name>hive.heartbeat.interval</name>
- <value>1000</value>
- <description>Send a heartbeat after this interval - used by mapjoin
- and filter operators</description>
- </property>
-
- <property>
- <name>hive.merge.size.per.task</name>
- <value>256000000</value>
- <description>Size of merged files at the end of the job</description>
- </property>
-
- <property>
- <name>hive.merge.size.smallfiles.avgsize</name>
- <value>16000000</value>
- <description>When the average output file size of a job is less than
- this number, Hive will start an additional map-reduce job to merge
- the output files into bigger files. This is only done for map-only
- jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
- hive.merge.mapredfiles is true.</description>
- </property>
-
- <property>
- <name>hive.script.auto.progress</name>
- <value>false</value>
- <description>Whether Hive Tranform/Map/Reduce Clause should
- automatically send progress information to TaskTracker to avoid the
- task getting killed because of inactivity. Hive sends progress
- information when the script is outputting to stderr. This option
- removes the need of periodically producing stderr messages, but users
- should be cautious because this may prevent infinite loops in the
- scripts to be killed by TaskTracker. </description>
- </property>
-
- <property>
- <name>hive.script.serde</name>
- <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
- <description>The default serde for trasmitting input data to and
- reading output data from the user scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordreader</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
- <description>The default record reader for reading data from the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.script.recordwriter</name>
- <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
- <description>The default record writer for writing data to the user
- scripts. </description>
- </property>
-
- <property>
- <name>hive.input.format</name>
- <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
- <description>The default input format, if it is not specified, the
- system assigns it. It is set to HiveInputFormat for hadoop versions
- 17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
- hadoop 20. The user can always overwrite it - if there is a bug in
- CombinedHiveInputFormat, it can always be manually set to
- HiveInputFormat. </description>
- </property>
-
- <property>
- <name>hive.udtf.auto.progress</name>
- <value>false</value>
- <description>Whether Hive should automatically send progress
- information to TaskTracker when using UDTF's to prevent the task
- getting killed because of inactivity. Users should be cautious
- because this may prevent TaskTracker from killing tasks with infinte
- loops. </description>
- </property>
-
- <property>
- <name>hive.mapred.reduce.tasks.speculative.execution</name>
- <value>true</value>
- <description>Whether speculative execution for reducers should be
- turned on. </description>
- </property>
-
- <property>
- <name>hive.exec.counters.pull.interval</name>
- <value>1000</value>
- <description>The interval with which to poll the JobTracker for the
- counters the running job. The smaller it is the more load there will
- be on the jobtracker, the higher it is the less granular the caught
- will be.</description>
- </property>
-
- <property>
- <name>hive.enforce.bucketing</name>
- <value>false</value>
- <description>Whether bucketing is enforced. If true, while inserting
- into the table, bucketing is enforced. </description>
- </property>
-
- <property>
- <name>hive.enforce.sorting</name>
- <value>false</value>
- <description>Whether sorting is enforced. If true, while inserting
- into the table, sorting is enforced. </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.connection.url.hook</name>
- <value></value>
- <description>Name of the hook to use for retriving the JDO connection
- URL. If empty, the value in javax.jdo.option.ConnectionURL is used
- </description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.attempts</name>
- <value>1</value>
- <description>The number of times to retry a metastore call if there
- were a connection error</description>
- </property>
-
- <property>
- <name>hive.metastore.ds.retry.interval</name>
- <value>1000</value>
- <description>The number of miliseconds between metastore retry
- attempts</description>
- </property>
-
- <property>
- <name>hive.metastore.server.min.threads</name>
- <value>200</value>
- <description>Minimum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.max.threads</name>
- <value>100000</value>
- <description>Maximum number of worker threads in the Thrift server's
- pool.</description>
- </property>
-
- <property>
- <name>hive.metastore.server.tcp.keepalive</name>
- <value>true</value>
- <description>Whether to enable TCP keepalive for the metastore server.
- Keepalive will prevent accumulation of half-open connections.
- </description>
- </property>
-
- <property>
- <name>hive.optimize.reducededuplication</name>
- <value>true</value>
- <description>Remove extra map-reduce jobs if the data is already
- clustered by the same key which needs to be used again. This should
- always be set to true. Since it is a new feature, it has been made
- configurable.</description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition</name>
- <value>false</value>
- <description>Whether or not to allow dynamic partitions in DML/DDL.
- </description>
- </property>
-
- <property>
- <name>hive.exec.dynamic.partition.mode</name>
- <value>strict</value>
- <description>In strict mode, the user must specify at least one static
- partition in case the user accidentally overwrites all partitions.
- </description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions</name>
- <value>1000</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in total.</description>
- </property>
-
- <property>
- <name>hive.exec.max.dynamic.partitions.pernode</name>
- <value>100</value>
- <description>Maximum number of dynamic partitions allowed to be
- created in each mapper/reducer node.</description>
- </property>
-
- <property>
- <name>hive.default.partition.name</name>
- <value>__HIVE_DEFAULT_PARTITION__</value>
- <description>The default partition name in case the dynamic partition
- column value is null/empty string or anyother values that cannot be
- escaped. This value must not contain any special character used in
- HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
- dynamic partition value should not contain this value to avoid
- confusions.</description>
- </property>
-
- <property>
- <name>fs.har.impl</name>
- <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
- <description>The implementation for accessing Hadoop Archives. Note
- that this won't be applicable to Hadoop vers less than 0.20
- </description>
- </property>
-
- <property>
- <name>hive.archive.enabled</name>
- <value>false</value>
- <description>Whether archiving operations are permitted</description>
- </property>
-
- <property>
- <name>hive.archive.har.parentdir.settable</name>
- <value>false</value>
- <description>In new Hadoop versions, the parent directory must be set
- while
- creating a HAR. Because this functionality is hard to detect with just
- version
- numbers, this conf var needs to be set manually.</description>
- </property>
-
- <!-- HBase Storage Handler Parameters -->
-
- <property>
- <name>hive.hbase.wal.enabled</name>
- <value>true</value>
- <description>Whether writes to HBase should be forced to the
- write-ahead log. Disabling this improves HBase write performance at
- the risk of lost writes in case of a crash.</description>
- </property>
-
-</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
index eab38a6..6f195f5 100644
--- a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
@@ -31,7 +31,7 @@
# FATAL, ERROR, WARN, INFO, DEBUG
#
#------------------------------------------------------------------------------
-log4j.rootCategory=INFO, S
+log4j.rootCategory=FATAL, S
log4j.logger.com.dappit.Dapper.parser=ERROR
log4j.logger.org.w3c.tidy=FATAL
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
new file mode 100644
index 0000000..ccfcd74
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-site.xml
@@ -0,0 +1,5189 @@
+<?xml version="1.0"?>
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+ <configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>33554432</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>32768</value>
+ </property>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically
+ set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property
+ to -1, Hive will automatically figure out
+ what should be the number
+ of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input
+ size is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified in the configuration parameter mapred.reduce.tasks is
+ negative, hive will use this one as the max number of reducers when
+ automatically determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns
+ on sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is
+ changed independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to
+ metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the
+ client side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is
+ turned off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat
+ the 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1
+ to make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for
+ the tables in the join.
+ This is based on the skewed keys stored in
+ the metadata. At compile
+ time, the plan is broken
+ into different
+ joins: one for the skewed keys, and the other for the
+ remaining keys.
+ And then,
+ a union is performed for the 2 joins generated above. So
+ unless the
+ same skewed key is present
+ in both the joined tables, the
+ join for the skewed key will be
+ performed as a map-side join.
+
+ The main
+ difference between this paramater and hive.optimize.skewjoin
+ is that
+ this parameter
+ uses the skew information stored in the metastore to
+ optimize the plan
+ at compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored
+ in the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the
+ query plan to take care of it, and
+ hive.optimize.skewjoin will be a
+ no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when
+ hive.optimize.skewjoin.compiletime is set
+ to true, since an
+ extra
+ union is inserted.
+
+ The merge is triggered if either of
+ hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to true.
+ If the
+ user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles
+ to false, the idea was the
+ number of reducers are few, so the number
+ of files anyway are small.
+ However, with this optimization,
+ we are
+ increasing the number of files possibly by a big margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>true</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c,
+ count(1) from T group by a, b, c with
+ rollup;
+ 4 rows are created per
+ row: (a, b, c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to explosion across map-reduce boundary if the
+ cardinality
+ of T is very high,
+ and map-side aggregation does not do a
+ very good job.
+
+ This parameter decides if hive should add an
+ additional map-reduce job.
+ If the grouping set
+ cardinality (4 in the
+ example above), is more than this value, a new MR job is
+ added under
+ the
+ assumption that the orginal group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the
+ streaming table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm
+ is as follows: At runtime, detect the keys with a large
+ skew. Instead
+ of
+ processing those keys, store them temporarily in a hdfs directory.
+ In a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up
+ map-reduce job (for the skewed keys) would be much
+ faster, since it
+ would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join
+ operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum
+ split size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script
+ in scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to
+ a local/hdfs file or a hive table) is compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The
+ compression codec and other options are determined from hadoop
+ config variables mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be
+ invoked for each statement. A pre-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as
+ the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table; If the local
+ task's memory usage is more than this number, the local task will be
+ abort by themself. It means the data of small table is too large to
+ be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can
+ take to hold the key/value into in-memory hash table when this map
+ join followed by a group by; If the local task's memory usage is
+ more than this number, the local task will be abort by themself. It
+ means the data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified
+ size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a
+ n-way join is smaller
+ than this size, the join is directly
+ converted to a mapjoin(there is
+ no conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does
+ not take
+ affect. If it is on, and if there are map-join jobs followed
+ by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged
+ with the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but
+ users should be cautious because this may prevent infinite loops in
+ the scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if
+ it is not divide evenly by the value of
+ hive.exec.counters.pull.interval it will be
+ logged less frequently
+ than specified.
+ This only has an effect if
+ hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where T1 and T2 are bucketed/sorted by the same keys into the same
+ number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query)
+ of the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore
+ server. Keepalive will prevent accumulation of half-open
+ connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is
+ used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one
+ static partition in case the user accidentally overwrites all
+ partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that
+ the dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC
+ connection and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats
+ publisher/aggregator got an exception updating intermediate
+ database. Default is no tries on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the
+ next retry. The actual wait time is calculated by baseWindow *
+ failues baseWindow * (failure 1) * (random number between
+ [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables
+ are accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats
+ collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the value
+ < 0 then hashing
+ is never used,
+ if the value >= 0 then hashing is used only when the
+ key prefixes
+ length
+ exceeds that value. The key prefix is defined as everything
+ preceding the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will
+ do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to
+ userX and userY,
+ and grant create privilege to userZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a
+ new table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to
+ roleX and roleY,
+ and grant create privilege to roleZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition
+ insert generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the
+ index data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the
+ entries in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would
+ happen if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias
+ auto generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text"
+ (for human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move
+ tasks have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled,
+ the tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH
+ task minimizing
+ latency.
+ Currently the query should be single sourced
+ not having any subquery and
+ should not have
+ any aggregations or
+ distincts (which incurrs RS), lateral views and
+ joins.
+ 1. minimal :
+ SELECT STAR, FILTER on partition columns, LIMIT only
+ 2. more :
+ SELECT, FILTER, LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not
+ set, then logging will be initialized using
+ hive-log4j.properties
+ found on the classpath.
+ If the property is set, the value must be a
+ valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"),
+ which you can then
+ extract a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be
+ initialized using
+ hive-exec-log4j.properties found on the classpath.
+ If the property is set, the value must be a valid URI (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you can then
+ extract a URL from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from
+ the table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will
+ choose a number which is a power of two,
+ unless the user specifies
+ the number of reducers to use using mapred.reduce.tasks. The number
+ of
+ reducers
+ may be set to a power of two, only to be followed by a
+ merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will
+ be the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers
+ being used in the
+ final map reduce
+ job, e.g. if a job was originally going to take 257
+ reducers,
+ it will
+ now take 512 reducers, similarly if the max number of reducers
+ is
+ 511,
+ and a job was going to use this many, it will now use 256
+ reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group
+ By or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting
+ $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication
+ check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate
+ requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+ </configuration>
+
+ <!-- Hive Execution Parameters -->
+ <property>
+ <name>mapred.reduce.tasks</name>
+ <value>-1</value>
+ <description>The default number of reduce tasks per job. Typically set
+ to a prime close to the number of available hosts. Ignored when
+ mapred.job.tracker is "local". Hadoop set this to 1 by default,
+ whereas hive uses -1 as its default value.
+ By setting this property to
+ -1, Hive will automatically figure out what
+ should be the number of
+ reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.bytes.per.reducer</name>
+ <value>1000000000</value>
+ <description>size per reducer.The default is 1G, i.e if the input size
+ is 10G, it will use 10 reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.reducers.max</name>
+ <value>999</value>
+ <description>max number of reducers will be used. If the one
+ specified
+ in the configuration parameter mapred.reduce.tasks is
+ negative, hive
+ will use this one as the max number of reducers when
+ automatically
+ determine number of reducers.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.header</name>
+ <value>false</value>
+ <description>Whether to print the names of the columns in query
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.print.current.db</name>
+ <value>false</value>
+ <description>Whether to include the current database in the hive
+ prompt.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.prompt</name>
+ <value>hive</value>
+ <description>Command line prompt configuration value. Other hiveconf
+ can be used in
+ this configuration value. Variable substitution will
+ only be invoked at
+ the hive
+ cli startup.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cli.pretty.output.num.cols</name>
+ <value>-1</value>
+ <description>The number of columns to use when formatting output
+ generated
+ by the DESCRIBE PRETTY table_name command. If the value of
+ this
+ property
+ is -1, then hive will use the auto-detected terminal
+ width.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.scratchdir</name>
+ <value>/tmp/hive-${user.name}</value>
+ <description>Scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.exec.local.scratchdir</name>
+ <value>/tmp/${user.name}</value>
+ <description>Local scratch space for Hive jobs</description>
+ </property>
+
+ <property>
+ <name>hive.test.mode</name>
+ <value>false</value>
+ <description>whether hive is running in test mode. If yes, it turns on
+ sampling and prefixes the output tablename
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.prefix</name>
+ <value>test_</value>
+ <description>if hive is running in test mode, prefixes the output
+ table by this string
+ </description>
+ </property>
+
+ <!-- If the input table is not bucketed, the denominator of the tablesample
+ is determinied by the parameter below -->
+ <!-- For example, the following query: -->
+ <!-- INSERT OVERWRITE TABLE dest -->
+ <!-- SELECT col1 from src -->
+ <!-- would be converted to -->
+ <!-- INSERT OVERWRITE TABLE test_dest -->
+ <!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+ <property>
+ <name>hive.test.mode.samplefreq</name>
+ <value>32</value>
+ <description>if hive is running in test mode and table is not
+ bucketed, sampling frequency
+ </description>
+ </property>
+
+ <property>
+ <name>hive.test.mode.nosamplelist</name>
+ <value></value>
+ <description>if hive is running in test mode, dont sample the above
+ comma seperated list of tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.uris</name>
+ <value></value>
+ <description>Thrift uri for the remote metastore. Used by metastore
+ client to connect to remote metastore.
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionURL</name>
+ <value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+ <description>JDBC connect string for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionDriverName</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>Driver class name for a JDBC metastore</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.PersistenceManagerFactoryClass</name>
+ <value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+ <description>class implementing the jdo persistence</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.DetachAllOnCommit</name>
+ <value>true</value>
+ <description>detaches all objects from session so that they can be
+ used after transaction is committed
+ </description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.NonTransactionalRead</name>
+ <value>true</value>
+ <description>reads outside of transactions</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionUserName</name>
+ <value>APP</value>
+ <description>username to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.ConnectionPassword</name>
+ <value>mine</value>
+ <description>password to use against metastore database</description>
+ </property>
+
+ <property>
+ <name>javax.jdo.option.Multithreaded</name>
+ <value>true</value>
+ <description>Set this to true if multiple threads access metastore
+ through JDO concurrently.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.connectionPoolingType</name>
+ <value>DBCP</value>
+ <description>Uses a DBCP connection pool for JDBC metastore
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateTables</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateColumns</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.validateConstraints</name>
+ <value>false</value>
+ <description>validates existing schema against code. turn this on if
+ you want to verify existing schema
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.storeManagerType</name>
+ <value>rdbms</value>
+ <description>metadata store type</description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoCreateSchema</name>
+ <value>true</value>
+ <description>creates necessary schema on a startup if one doesn't
+ exist. set this to false, after creating it once
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.autoStartMechanismMode</name>
+ <value>checked</value>
+ <description>throw exception if metadata tables are incorrect
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.transactionIsolation</name>
+ <value>read-committed</value>
+ <description>Default transaction isolation level for identity
+ generation.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2</name>
+ <value>false</value>
+ <description>Use a level 2 cache. Turn this off if metadata is changed
+ independently of hive metastore server
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.cache.level2.type</name>
+ <value>SOFT</value>
+ <description>SOFT=soft reference based cache, WEAK=weak reference
+ based cache.
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.identifierFactory</name>
+ <value>datanucleus</value>
+ <description>Name of the identifier factory to use when generating
+ table/column names etc. 'datanucleus' is used for backward
+ compatibility
+ </description>
+ </property>
+
+ <property>
+ <name>datanucleus.plugin.pluginRegistryBundleCheck</name>
+ <value>LOG</value>
+ <description>Defines what happens when plugin bundles are found and
+ are duplicated [EXCEPTION|LOG|NONE]
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.warehouse.dir</name>
+ <value>/user/hive/warehouse</value>
+ <description>location of default database for the warehouse
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.execute.setugi</name>
+ <value>false</value>
+ <description>In unsecure mode, setting this property to true will
+ cause the metastore to execute DFS operations using the client's
+ reported user and group permissions. Note that this property must be
+ set on both the client and server sides. Further note that its best
+ effort. If client sets its to true and server sets it to false,
+ client setting will be ignored.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.listeners</name>
+ <value></value>
+ <description>list of comma seperated listeners for metastore events.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.inherit.table.properties</name>
+ <value></value>
+ <description>list of comma seperated keys occurring in table
+ properties which will get inherited to newly created partitions. *
+ implies all the keys will get inherited.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.export.location</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, it is the location to which the metadata will be exported.
+ The default is an empty string, which results in the metadata being
+ exported to the current user's home directory on HDFS.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metadata.move.exported.metadata.to.trash</name>
+ <value></value>
+ <description>When used in conjunction with the
+ org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event
+ listener, this setting determines if the metadata that is exported
+ will subsequently be moved to the user's trash directory alongside
+ the dropped table data. This ensures that the metadata will be
+ cleaned up along with the dropped table data.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.partition.name.whitelist.pattern</name>
+ <value></value>
+ <description>Partition names will be checked against this regex
+ pattern and rejected if not matched.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.end.function.listeners</name>
+ <value></value>
+ <description>list of comma separated listeners for the end of
+ metastore functions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.expiry.duration</name>
+ <value>0</value>
+ <description>Duration after which events expire from events table (in
+ seconds)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.event.clean.freq</name>
+ <value>0</value>
+ <description>Frequency at which timer task runs to purge expired
+ events in metastore(in seconds).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.connect.retries</name>
+ <value>5</value>
+ <description>Number of retries while opening a connection to metastore
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.failure.retries</name>
+ <value>3</value>
+ <description>Number of retries upon failure of Thrift metastore calls
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.connect.retry.delay</name>
+ <value>1</value>
+ <description>Number of seconds for the client to wait between
+ consecutive connection attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.client.socket.timeout</name>
+ <value>20</value>
+ <description>MetaStore Client socket timeout in seconds</description>
+ </property>
+
+ <property>
+ <name>hive.metastore.rawstore.impl</name>
+ <value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+ <description>Name of the class that implements
+ org.apache.hadoop.hive.metastore.rawstore interface. This class is
+ used to store and retrieval of raw metadata objects such as table,
+ database
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.max</name>
+ <value>300</value>
+ <description>Maximum number of objects (tables/partitions) can be
+ retrieved from metastore in one batch. The higher the number, the
+ less the number of round trips is needed to the Hive metastore
+ server, but it may also cause higher memory requirement at the client
+ side.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.batch.retrieve.table.partition.max</name>
+ <value>1000</value>
+ <description>Maximum number of table partitions that metastore
+ internally retrieves in one batch.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.default.fileformat</name>
+ <value>TextFile</value>
+ <description>Default file format for CREATE TABLE statement. Options
+ are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+ ... STORED AS <TEXTFILE|SEQUENCEFILE> to override</description>
+ </property>
+
+ <property>
+ <name>hive.fileformat.check</name>
+ <value>true</value>
+ <description>Whether to check file format or not when loading data
+ files
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr</name>
+ <value>true</value>
+ <description>Whether to use map-side aggregation in Hive Group By
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.skewindata</name>
+ <value>false</value>
+ <description>Whether there is skew in data to optimize group by
+ queries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.multigroupby.common.distincts</name>
+ <value>true</value>
+ <description>Whether to optimize a multi-groupby query with the same
+ distinct.
+ Consider a query like:
+
+ from src
+ insert overwrite table dest1
+ select col1, count(distinct colx) group by
+ col1
+ insert overwrite table
+ dest2 select col2, count(distinct colx) group by
+ col2;
+
+ With this
+ parameter set to true, first we spray by the distinct value
+ (colx),
+ and then
+ perform the 2 groups bys. This makes sense if map-side
+ aggregation is turned
+ off. However,
+ with maps-side aggregation, it
+ might be useful in some cases to treat the
+ 2 inserts independently,
+ thereby performing the query above in 2MR jobs instead of 3 (due to
+ spraying
+ by distinct key first).
+ If this parameter is turned off, we
+ dont consider the fact that the
+ distinct key is the same across
+ different MR jobs.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.mapaggr.checkinterval</name>
+ <value>100000</value>
+ <description>Number of rows after which size of the grouping
+ keys/aggregation classes is performed
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.local.mem</name>
+ <value>0</value>
+ <description>For local mode, memory of the mappers/reducers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.map.aggr.hash.percentmemory</name>
+ <value>0.3</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table, when this group by is followed by map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
+ <value>0.9</value>
+ <description>The max memory to be used by map-side grup aggregation
+ hash table, if the memory usage is higher than this number, force to
+ flush data
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.percentmemory</name>
+ <value>0.5</value>
+ <description>Portion of total memory to be used by map-side grup
+ aggregation hash table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.aggr.hash.min.reduction</name>
+ <value>0.5</value>
+ <description>Hash aggregation will be turned off if the ratio between
+ hash
+ table size and input rows is bigger than this number. Set to 1 to
+ make
+ sure
+ hash aggregation is never turned off.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.cp</name>
+ <value>true</value>
+ <description>Whether to enable column pruner</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter</name>
+ <value>false</value>
+ <description>Whether to enable automatic use of indexes</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.groupby</name>
+ <value>false</value>
+ <description>Whether to enable optimization of group-by queries using
+ Aggregate indexes.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd</name>
+ <value>true</value>
+ <description>Whether to enable predicate pushdown</description>
+ </property>
+
+ <property>
+ <name>hive.optimize.ppd.storage</name>
+ <value>true</value>
+ <description>Whether to push predicates down into storage handlers.
+ Ignored when hive.optimize.ppd is false.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ppd.recognizetransivity</name>
+ <value>true</value>
+ <description>Whether to transitively replicate predicate filters over
+ equijoin conditions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.groupby</name>
+ <value>true</value>
+ <description>Whether to enable the bucketed group by from bucketed
+ partitions/tables.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin.compiletime</name>
+ <value>false</value>
+ <description>Whether to create a separate plan for skewed keys for the
+ tables in the join.
+ This is based on the skewed keys stored in the
+ metadata. At compile time,
+ the plan is broken
+ into different joins: one
+ for the skewed keys, and the other for the
+ remaining keys. And then,
+ a
+ union is performed for the 2 joins generated above. So unless the
+ same skewed key is present
+ in both the joined tables, the join for the
+ skewed key will be
+ performed as a map-side join.
+
+ The main difference
+ between this paramater and hive.optimize.skewjoin is
+ that this
+ parameter
+ uses the skew information stored in the metastore to
+ optimize the plan at
+ compile time itself.
+ If there is no skew
+ information in the metadata, this parameter will
+ not have any affect.
+ Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin
+ should
+ be set to true.
+ Ideally, hive.optimize.skewjoin should be
+ renamed as
+ hive.optimize.skewjoin.runtime, but not doing
+ so for
+ backward compatibility.
+
+ If the skew information is correctly stored in
+ the metadata,
+ hive.optimize.skewjoin.compiletime
+ would change the query
+ plan to take care of it, and hive.optimize.skewjoin
+ will be a no-op.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.union.remove</name>
+ <value>false</value>
+ <description>
+ Whether to remove the union and push the operators
+ between union and the
+ filesink above
+ union. This avoids an extra scan
+ of the output by union. This is
+ independently useful for union
+ queries, and specially useful when hive.optimize.skewjoin.compiletime
+ is set
+ to true, since an
+ extra union is inserted.
+
+ The merge is triggered
+ if either of hive.merge.mapfiles or
+ hive.merge.mapredfiles is set to
+ true.
+ If the user has set hive.merge.mapfiles to true and
+ hive.merge.mapredfiles to false, the idea was the
+ number of reducers
+ are few, so the number of files anyway are small.
+ However, with this
+ optimization,
+ we are increasing the number of files possibly by a big
+ margin. So, we
+ merge aggresively.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.supports.subdirectories</name>
+ <value>false</value>
+ <description>Whether the version of hadoop which is running supports
+ sub-directories for tables/partitions.
+ Many hive optimizations can be
+ applied if the hadoop version supports
+ sub-directories for
+ tables/partitions. It was added by MAPREDUCE-1501
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multigroupby.singlemr</name>
+ <value>false</value>
+ <description>Whether to optimize multi group by query to generate
+ single M/R
+ job plan. If the multi group by query has common group by
+ keys, it will
+ be
+ optimized to generate single M/R job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. The
+ only downside to this
+ is that
+ it limits the number of mappers to the number of files.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.map.groupby.sorted.testmode</name>
+ <value>false</value>
+ <description>If the bucketing/sorting properties of the table exactly
+ match the grouping key, whether to
+ perform the group by in the mapper
+ by using BucketizedHiveInputFormat. If
+ the test mode is set, the plan
+ is not converted, but a query property is set to denote the same.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.new.job.grouping.set.cardinality</name>
+ <value>30</value>
+ <description>
+ Whether a new map-reduce job should be launched for
+ grouping
+ sets/rollups/cubes.
+ For a query like: select a, b, c, count(1)
+ from T group by a, b, c with
+ rollup;
+ 4 rows are created per row: (a, b,
+ c), (a, b, null), (a, null, null),
+ (null, null, null).
+ This can lead to
+ explosion across map-reduce boundary if the cardinality
+ of T is very
+ high,
+ and map-side aggregation does not do a very good job.
+
+ This
+ parameter decides if hive should add an additional map-reduce job.
+ If
+ the grouping set
+ cardinality (4 in the example above), is more than
+ this value, a new MR job is
+ added under the
+ assumption that the orginal
+ group by will reduce the data size.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.emit.interval</name>
+ <value>1000</value>
+ <description>How many rows in the right-most join operand Hive should
+ buffer before emitting the join result.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.join.cache.size</name>
+ <value>25000</value>
+ <description>How many rows in the joining tables (except the streaming
+ table) should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.bucket.cache.size</name>
+ <value>100</value>
+ <description>How many values in each keys in the map-joined table
+ should be cached in memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.cache.numrows</name>
+ <value>25000</value>
+ <description>How many rows should be cached by jdbm for map join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.skewjoin</name>
+ <value>false</value>
+ <description>Whether to enable skew join optimization.
+ The algorithm is
+ as follows: At runtime, detect the keys with a large
+ skew. Instead of
+ processing those keys, store them temporarily in a hdfs directory. In
+ a
+ follow-up map-reduce
+ job, process those skewed keys. The same key
+ need not be skewed for all
+ the tables, and so,
+ the follow-up map-reduce
+ job (for the skewed keys) would be much faster,
+ since it would be a
+ map-join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.key</name>
+ <value>100000</value>
+ <description>Determine if we get a skew key in join. If we see more
+ than the specified number of rows with the same key in join operator,
+ we think the key as a skew join key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.map.tasks</name>
+ <value>10000</value>
+ <description> Determine the number of map task used in the follow up
+ map join job
+ for a skew join. It should be used together with
+ hive.skewjoin.mapjoin.min.split
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.skewjoin.mapjoin.min.split</name>
+ <value>33554432</value>
+ <description> Determine the number of map task at most used in the
+ follow up map join job
+ for a skew join by specifying the minimum split
+ size. It should be used
+ together with
+ hive.skewjoin.mapjoin.map.tasks
+ to perform a fine grained control.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.mode</name>
+ <value>nonstrict</value>
+ <description>The mode in which the hive operations are being
+ performed.
+ In strict mode, some risky queries are not allowed to run.
+ They
+ include:
+ Cartesian Product.
+ No partition being picked up for a
+ query.
+ Comparing bigints and strings.
+ Comparing bigints and doubles.
+ Orderby without limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for bucketed map-side join, and it
+ cannot be performed,
+ should the query fail or not ? For eg, if the
+ buckets in the tables being
+ joined are
+ not a multiple of each other,
+ bucketed map-side join cannot be
+ performed, and the
+ query will fail if
+ hive.enforce.bucketmapjoin is set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.maxerrsize</name>
+ <value>100000</value>
+ <description>Maximum number of bytes a script is allowed to emit to
+ standard error (per map-reduce task). This prevents runaway scripts
+ from filling logs partitions to capacity
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.script.allow.partial.consumption</name>
+ <value>false</value>
+ <description> When enabled, this option allows a user script to exit
+ successfully without consuming all the data from the standard input.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.id.env.var</name>
+ <value>HIVE_SCRIPT_OPERATOR_ID</value>
+ <description> Name of the environment variable that holds the unique
+ script operator ID in the user's transform function (the custom
+ mapper/reducer that the user has specified in the query)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.operator.truncate.env</name>
+ <value>false</value>
+ <description>Truncate each environment variable for external script in
+ scripts operator to 20KB (to fit system limits)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.output</name>
+ <value>false</value>
+ <description> This controls whether the final outputs of a query (to a
+ local/hdfs file or a hive table) is compressed. The compression codec
+ and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.compress.intermediate</name>
+ <value>false</value>
+ <description> This controls whether intermediate files produced by
+ hive between multiple map-reduce jobs are compressed. The compression
+ codec and other options are determined from hadoop config variables
+ mapred.output.compress*
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel</name>
+ <value>false</value>
+ <description>Whether to execute jobs in parallel</description>
+ </property>
+
+ <property>
+ <name>hive.exec.parallel.thread.number</name>
+ <value>8</value>
+ <description>How many jobs at most can be executed in parallel
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rowoffset</name>
+ <value>false</value>
+ <description>Whether to provide the row offset virtual column
+ </description>
+ </property>
+
+ <property>
+ <name>hive.task.progress</name>
+ <value>false</value>
+ <description>Whether Hive should periodically update task progress
+ counters during execution. Enabling this allows task progress to be
+ monitored more closely in the job tracker, but may impose a
+ performance penalty. This flag is automatically set to true for jobs
+ with hive.exec.dynamic.partition set to true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.war.file</name>
+ <value>lib/hive-hwi-@VERSION@.war</value>
+ <description>This sets the path to the HWI war file, relative to
+ ${HIVE_HOME}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.host</name>
+ <value>0.0.0.0</value>
+ <description>This is the host address the Hive Web Interface will
+ listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hwi.listen.port</name>
+ <value>9999</value>
+ <description>This is the port the Hive Web Interface will listen on
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.pre.hooks</name>
+ <value></value>
+ <description>Comma-separated list of pre-execution hooks to be invoked
+ for each statement. A pre-execution hook is specified as the name of
+ a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.post.hooks</name>
+ <value></value>
+ <description>Comma-separated list of post-execution hooks to be
+ invoked for each statement. A post-execution hook is specified as the
+ name of a Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.failure.hooks</name>
+ <value></value>
+ <description>Comma-separated list of on-failure hooks to be invoked
+ for each statement. An on-failure hook is specified as the name of
+ Java class which implements the
+ org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.init.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks to be invoked at the
+ beginning of HMSHandler initialization. Aninit hook is specified as
+ the name of Java class which extends
+ org.apache.hadoop.hive.metastore.MetaStoreInitListener.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.publishers</name>
+ <value></value>
+ <description>Comma-separated list of statistics publishers to be
+ invoked on counters on each job. A client stats publisher is
+ specified as the name of a Java class which implements the
+ org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.client.stats.counters</name>
+ <value></value>
+ <description>Subset of counters that should be of interest for
+ hive.client.stats.publishers (when one wants to limit their
+ publishing). Non-display names should be used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapfiles</name>
+ <value>true</value>
+ <description>Merge small files at the end of a map-only job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.mapredfiles</name>
+ <value>false</value>
+ <description>Merge small files at the end of a map-reduce job
+ </description>
+ </property>
+
+ <property>
+ <name>hive.heartbeat.interval</name>
+ <value>1000</value>
+ <description>Send a heartbeat after this interval - used by mapjoin
+ and filter operators
+ </description>
+ </property>
+
+ <property>
+ <name>hive.merge.size.per.task</name>
+ <value>256000000</value>
+ <description>Size of merged files at the end of the job</description>
+ </property>
+
+ <property>
+ <name>hive.merge.smallfiles.avgsize</name>
+ <value>16000000</value>
+ <description>When the average output file size of a job is less than
+ this number, Hive will start an additional map-reduce job to merge
+ the output files into bigger files. This is only done for map-only
+ jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+ hive.merge.mapredfiles is true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.smalltable.filesize</name>
+ <value>25000000</value>
+ <description>The threshold for the input file size of the small
+ tables; if the file size is smaller than this threshold, it will try
+ to convert the common join into map join
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ignore.mapjoin.hint</name>
+ <value>true</value>
+ <description>Ignore the mapjoin hint</description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.localtask.max.memory.usage</name>
+ <value>0.90</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table; If the local task's
+ memory usage is more than this number, the local task will be abort
+ by themself. It means the data of small table is too large to be hold
+ in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.followby.gby.localtask.max.memory.usage</name>
+ <value>0.55</value>
+ <description>This number means how much memory the local task can take
+ to hold the key/value into in-memory hash table when this map join
+ followed by a group by; If the local task's memory usage is more than
+ this number, the local task will be abort by themself. It means the
+ data of small table is too large to be hold in the memory.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapjoin.check.memory.rows</name>
+ <value>100000</value>
+ <description>The number means after how many rows processed it needs
+ to check the memory usage
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file size
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>true</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this
+ paramater is on, and the sum of size for n-1 of the
+ tables/partitions
+ for a n-way join is smaller than the
+ specified size, the join is
+ directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask.size</name>
+ <value>10000000</value>
+ <description>If hive.auto.convert.join.noconditionaltask is off, this
+ parameter does not take affect. However, if it
+ is on, and the sum of
+ size for n-1 of the tables/partitions for a n-way
+ join is smaller than
+ this size, the join is directly
+ converted to a mapjoin(there is no
+ conditional task). The default is 10MB
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.mapjoin.mapreduce</name>
+ <value>false</value>
+ <description>If hive.auto.convert.join is off, this parameter does not
+ take
+ affect. If it is on, and if there are map-join jobs followed by a
+ map-reduce
+ job (for e.g a group by), each map-only job is merged with
+ the
+ following
+ map-reduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive Tranform/Map/Reduce Clause should
+ automatically send progress information to TaskTracker to avoid the
+ task getting killed because of inactivity. Hive sends progress
+ information when the script is outputting to stderr. This option
+ removes the need of periodically producing stderr messages, but users
+ should be cautious because this may prevent infinite loops in the
+ scripts to be killed by TaskTracker.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.serde</name>
+ <value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+ <description>The default serde for trasmitting input data to and
+ reading output data from the user scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.binary.record.max.length</name>
+ <value>1000</value>
+ <description>Read from a binary stream and treat each
+ hive.binary.record.max.length bytes as a record.
+ The last record
+ before the end of stream can have less than
+ hive.binary.record.max.length bytes
+ </description>
+ </property>
+
+
+ <property>
+ <name>hive.script.recordreader</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+ <description>The default record reader for reading data from the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.script.recordwriter</name>
+ <value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+ <description>The default record writer for writing data to the user
+ scripts.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.input.format</name>
+ <value>org.apache.hadoop.hive.ql.io.CombineHiveInputFormat</value>
+ <description>The default input format. Set this to HiveInputFormat if
+ you encounter problems with CombineHiveInputFormat.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.udtf.auto.progress</name>
+ <value>false</value>
+ <description>Whether Hive should automatically send progress
+ information to TaskTracker when using UDTF's to prevent the task
+ getting killed because of inactivity. Users should be cautious
+ because this may prevent TaskTracker from killing tasks with infinte
+ loops.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.mapred.reduce.tasks.speculative.execution</name>
+ <value>true</value>
+ <description>Whether speculative execution for reducers should be
+ turned on.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.counters.pull.interval</name>
+ <value>1000</value>
+ <description>The interval with which to poll the JobTracker for the
+ counters the running job. The smaller it is the more load there will
+ be on the jobtracker, the higher it is the less granular the caught
+ will be.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.location</name>
+ <value>/tmp/${user.name}</value>
+ <description>
+ Location of Hive run time structured log file
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.enable.plan.progress</name>
+ <value>true</value>
+ <description>
+ Whether to log the plan's progress every time a job's
+ progress is checked.
+ These logs are written to the location specified
+ by
+ hive.querylog.location
+ </description>
+ </property>
+
+ <property>
+ <name>hive.querylog.plan.progress.interval</name>
+ <value>60000</value>
+ <description>
+ The interval to wait between logging the plan's progress
+ in
+ milliseconds.
+ If there is a whole number percentage change in the
+ progress of the
+ mappers or the reducers,
+ the progress is logged
+ regardless of this value.
+ The actual interval will be the ceiling of
+ (this value divided by the
+ value of
+ hive.exec.counters.pull.interval)
+ multiplied by the value of hive.exec.counters.pull.interval
+ I.e. if it
+ is not divide evenly by the value of
+ hive.exec.counters.pull.interval
+ it will be
+ logged less frequently than specified.
+ This only has an
+ effect if hive.querylog.enable.plan.progress is set to
+ true.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.bucketing</name>
+ <value>false</value>
+ <description>Whether bucketing is enforced. If true, while inserting
+ into the table, bucketing is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sorting</name>
+ <value>false</value>
+ <description>Whether sorting is enforced. If true, while inserting
+ into the table, sorting is enforced.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.bucketingsorting</name>
+ <value>true</value>
+ <description>If hive.enforce.bucketing or hive.enforce.sorting is
+ true, dont create a reducer for enforcing
+ bucketing/sorting for
+ queries of the form:
+ insert overwrite table T2 select * from T1;
+ where
+ T1 and T2 are bucketed/sorted by the same keys into the same number
+ of buckets.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.enforce.sortmergebucketmapjoin</name>
+ <value>false</value>
+ <description>If the user asked for sort-merge bucketed map-side join,
+ and it cannot be performed,
+ should the query fail or not ?
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join</name>
+ <value>false</value>
+ <description>Will the join be automatically converted to a sort-merge
+ join, if the joined tables pass
+ the criteria for sort-merge join.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.sortmerge.join.bigtable.selection.policy
+ </name>
+ <value>org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ
+ </value>
+ <description>The policy to choose the big table for automatic
+ conversion to sort-merge join.
+ By default, the table with the largest
+ partitions is assigned the big
+ table. All policies are:
+ . based on
+ position of the table - the leftmost table is selected
+ org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.
+ . based on
+ total size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.
+ . based on average size (all the partitions selected in the query) of
+ the table
+ org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.
+ New policies can be added in future.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.connection.url.hook</name>
+ <value></value>
+ <description>Name of the hook to use for retriving the JDO connection
+ URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a metastore call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.ds.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between metastore retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.min.threads</name>
+ <value>200</value>
+ <description>Minimum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.max.threads</name>
+ <value>100000</value>
+ <description>Maximum number of worker threads in the Thrift server's
+ pool.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the metastore server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.sasl.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will be secured
+ with SASL. Clients must authenticate with Kerberos.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.thrift.framed.transport.enabled</name>
+ <value>false</value>
+ <description>If true, the metastore thrift interface will use
+ TFramedTransport. When false (default) a standard TTransport is used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.keytab.file</name>
+ <value></value>
+ <description>The path to the Kerberos Keytab file containing the
+ metastore thrift server's service principal.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.kerberos.principal</name>
+ <value>hive-metastore/_HOST@EXAMPLE.COM</value>
+ <description>The service principal for the metastore thrift server.
+ The special string _HOST will be replaced automatically with the
+ correct host name.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.class</name>
+ <value>org.apache.hadoop.hive.thrift.MemoryTokenStore</value>
+ <description>The delegation token store implementation. Set to
+ org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced
+ cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.connectString
+ </name>
+ <value>localhost:2181</value>
+ <description>The ZooKeeper token store connect string.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
+ <value>/hive/cluster/delegation</value>
+ <description>The root path for token store data.</description>
+ </property>
+
+ <property>
+ <name>hive.cluster.delegation.token.store.zookeeper.acl</name>
+ <value>sasl:hive/host1@EXAMPLE.COM:cdrwa,sasl:hive/host2@EXAMPLE.COM:cdrwa
+ </value>
+ <description>ACL for token store entries. List comma separated all
+ server principals for the cluster.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.cache.pinobjtypes</name>
+ <value>Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order
+ </value>
+ <description>List of comma separated metastore object types that
+ should be pinned in the cache
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication</name>
+ <value>true</value>
+ <description>Remove extra map-reduce jobs if the data is already
+ clustered by the same key which needs to be used again. This should
+ always be set to true. Since it is a new feature, it has been made
+ configurable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.reducededuplication.min.reducer</name>
+ <value>4</value>
+ <description>Reduce deduplication merges two RSs by moving
+ key/parts/reducer-num of the child RS to parent RS.
+ That means if
+ reducer-num of the child RS is fixed (order by or forced
+ bucketing)
+ and small, it can make very slow, single MR.
+ The optimization will be
+ disabled if number of reducers is less than
+ specified value.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition</name>
+ <value>true</value>
+ <description>Whether or not to allow dynamic partitions in DML/DDL.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.dynamic.partition.mode</name>
+ <value>strict</value>
+ <description>In strict mode, the user must specify at least one static
+ partition in case the user accidentally overwrites all partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions</name>
+ <value>1000</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in total.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.dynamic.partitions.pernode</name>
+ <value>100</value>
+ <description>Maximum number of dynamic partitions allowed to be
+ created in each mapper/reducer node.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.max.created.files</name>
+ <value>100000</value>
+ <description>Maximum number of HDFS files created by all
+ mappers/reducers in a MapReduce job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.default.partition.name</name>
+ <value>__HIVE_DEFAULT_PARTITION__</value>
+ <description>The default partition name in case the dynamic partition
+ column value is null/empty string or anyother values that cannot be
+ escaped. This value must not contain any special character used in
+ HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
+ dynamic partition value should not contain this value to avoid
+ confusions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbclass</name>
+ <value>jdbc:derby</value>
+ <description>The default database that stores temporary hive
+ statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.autogather</name>
+ <value>true</value>
+ <description>A flag to gather statistics automatically during the
+ INSERT OVERWRITE command.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbcdriver</name>
+ <value>org.apache.derby.jdbc.EmbeddedDriver</value>
+ <description>The JDBC driver for the database that stores temporary
+ hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.dbconnectionstring</name>
+ <value>jdbc:derby:;databaseName=TempStatsStore;create=true</value>
+ <description>The default connection string for the database that
+ stores temporary hive statistics.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.publisher</name>
+ <value></value>
+ <description>The Java class (implementing the StatsPublisher
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.default.aggregator</name>
+ <value></value>
+ <description>The Java class (implementing the StatsAggregator
+ interface) that is used by default if hive.stats.dbclass is not JDBC
+ or HBase.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.jdbc.timeout</name>
+ <value>30</value>
+ <description>Timeout value (number of seconds) used by JDBC connection
+ and statements.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.max</name>
+ <value>0</value>
+ <description>Maximum number of retries when stats publisher/aggregator
+ got an exception updating intermediate database. Default is no tries
+ on failures.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.retries.wait</name>
+ <value>3000</value>
+ <description>The base waiting window (in milliseconds) before the next
+ retry. The actual wait time is calculated by baseWindow * failues
+ baseWindow * (failure 1) * (random number between [0.0,1.0]).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.reliable</name>
+ <value>false</value>
+ <description>Whether queries will fail because stats cannot be
+ collected completely accurately.
+ If this is set to true,
+ reading/writing from/into a partition may fail
+ becuase the stats
+ could
+ not be computed accurately.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.tablekeys</name>
+ <value>false</value>
+ <description>Whether join and group by keys on tables are derived and
+ maintained in the QueryPlan.
+ This is useful to identify how tables are
+ accessed and to determine if
+ they should be bucketed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.collect.scancols</name>
+ <value>false</value>
+ <description>Whether column accesses are tracked in the QueryPlan.
+ This is useful to identify how tables are accessed and to determine
+ if there are wasted columns that can be trimmed.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.ndv.error</name>
+ <value>20.0</value>
+ <description>Standard error expressed in percentage. Provides a
+ tradeoff between accuracy and compute cost.A lower value for error
+ indicates higher accuracy and a higher compute cost.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.stats.key.prefix.max.length</name>
+ <value>200</value>
+ <description>
+ Determines if when the prefix of the key used for
+ intermediate stats collection
+ exceeds a certain length, a hash of the
+ key is used instead. If the
+ value < 0 then hashing
+ is never used, if
+ the value >= 0 then hashing is used only when the key
+ prefixes length
+ exceeds that value. The key prefix is defined as everything preceding
+ the
+ task ID in the key.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.support.concurrency</name>
+ <value>false</value>
+ <description>Whether hive supports concurrency or not. A zookeeper
+ instance must be up and running for the default hive lock manager to
+ support read-write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.numretries</name>
+ <value>100</value>
+ <description>The number of times you want to try to get all the locks
+ </description>
+ </property>
+
+ <property>
+ <name>hive.unlock.numretries</name>
+ <value>10</value>
+ <description>The number of times you want to retry to do one unlock
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.sleep.between.retries</name>
+ <value>60</value>
+ <description>The sleep time (in seconds) between various retries
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.quorum</name>
+ <value></value>
+ <description>The list of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.client.port</name>
+ <value>2181</value>
+ <description>The port of zookeeper servers to talk to. This is only
+ needed for read/write locks.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.session.timeout</name>
+ <value>600000</value>
+ <description>Zookeeper client's session timeout. The client is
+ disconnected, and as a result, all locks released, if a heartbeat is
+ not sent in the timeout.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.namespace</name>
+ <value>hive_zookeeper_namespace</value>
+ <description>The parent node under which all zookeeper nodes are
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.zookeeper.clean.extra.nodes</name>
+ <value>false</value>
+ <description>Clean extra nodes at the end of the session.
+ </description>
+ </property>
+
+ <property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note
+ that this won't be applicable to Hadoop vers less than 0.20
+ </description>
+ </property>
+
+ <property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+ </property>
+
+ <property>
+ <name>hive.fetch.output.serde</name>
+ <value>org.apache.hadoop.hive.serde2.DelimitedJSONSerDe</value>
+ <description>The serde used by FetchTask to serialize the fetch
+ output.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.mode.local.auto</name>
+ <value>false</value>
+ <description> Let hive determine whether to run in local mode
+ automatically
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.drop.ignorenonexistent</name>
+ <value>true</value>
+ <description>
+ Do not report an error if DROP TABLE/VIEW specifies a
+ non-existent
+ table/view
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.show.job.failure.debug.info</name>
+ <value>true</value>
+ <description>
+ If a job fails, whether to provide a link in the CLI to
+ the task with
+ the
+ most failures, along with debugging hints if
+ applicable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.progress.timeout</name>
+ <value>0</value>
+ <description>
+ How long to run autoprogressor for the script/UDTF
+ operators (in
+ seconds).
+ Set to 0 for forever.
+ </description>
+ </property>
+
+ <!-- HBase Storage Handler Parameters -->
+
+ <property>
+ <name>hive.hbase.wal.enabled</name>
+ <value>true</value>
+ <description>Whether writes to HBase should be forced to the
+ write-ahead log. Disabling this improves HBase write performance at
+ the risk of lost writes in case of a crash.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.table.parameters.default</name>
+ <value></value>
+ <description>Default property values for newly created tables
+ </description>
+ </property>
+
+ <property>
+ <name>hive.entity.separator</name>
+ <value>@</value>
+ <description>Separator used to construct names of tables and
+ partitions. For example, dbname@tablename@partitionname
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.createtablelike.properties.whitelist</name>
+ <value></value>
+ <description>Table Properties to copy over when executing a Create
+ Table Like.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute</name>
+ <value>true</value>
+ <description>This enables substitution using syntax like ${var}
+ ${system:var} and ${env:var}.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.variable.substitute.depth</name>
+ <value>40</value>
+ <description>The maximum replacements the substitution engine will do.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.conf.validation</name>
+ <value>true</value>
+ <description>Eables type checking for registered hive configurations
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.enabled</name>
+ <value>false</value>
+ <description>enable or disable the hive client authorization
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.user.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some users
+ whenever a table gets created.
+ An example like
+ "userX,userY:select;userZ:create" will grant select
+ privilege to userX
+ and userY,
+ and grant create privilege to userZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.group.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some groups
+ whenever a table gets created.
+ An example like
+ "groupX,groupY:select;groupZ:create" will grant select
+ privilege to
+ groupX and groupY,
+ and grant create privilege to groupZ whenever a new
+ table created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.role.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to some roles
+ whenever a table gets created.
+ An example like
+ "roleX,roleY:select;roleZ:create" will grant select
+ privilege to roleX
+ and roleY,
+ and grant create privilege to roleZ whenever a new table
+ created.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.security.authorization.createtable.owner.grants</name>
+ <value></value>
+ <description>the privileges automatically granted to the owner
+ whenever a table gets created.
+ An example like "select,drop" will
+ grant select and drop privilege to
+ the owner of the table
+ </description>
+ </property>
+
+ <property>
+ <name>hive.metastore.authorization.storage.checks</name>
+ <value>false</value>
+ <description>Should the metastore do authorization checks against the
+ underlying storage
+ for operations like drop-partition (disallow the
+ drop-partition if the
+ user in
+ question doesn't have permissions to
+ delete the corresponding directory
+ on the storage).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.error.on.empty.partition</name>
+ <value>false</value>
+ <description>Whether to throw an excpetion if dynamic partition insert
+ generates empty results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.file.ignore.hdfs</name>
+ <value>false</value>
+ <description>True the hdfs location stored in the index file will be
+ igbored at runtime.
+ If the data got moved or the name of the cluster
+ got changed, the index
+ data should still be usable.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.minsize</name>
+ <value>5368709120</value>
+ <description>Minimum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.optimize.index.filter.compact.maxsize</name>
+ <value>-1</value>
+ <description>Maximum size (in bytes) of the inputs on which a compact
+ index is automatically used.
+ A negative number is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.size</name>
+ <value>10737418240</value>
+ <description>The maximum number of bytes that a query using the
+ compact index can read. Negative value is equivalent to infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.query.max.entries</name>
+ <value>10000000</value>
+ <description>The maximum number of index entries to read during a
+ query that uses the compact index. Negative value is equivalent to
+ infinity.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.index.compact.binary.search</name>
+ <value>true</value>
+ <description>Whether or not to use a binary search to find the entries
+ in an index table that match the filter, where possible
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exim.uri.scheme.whitelist</name>
+ <value>hdfs,pfile</value>
+ <description>A comma separated list of acceptable URI schemes for
+ import and export.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.lock.mapred.only.operation</name>
+ <value>false</value>
+ <description>This param is to control whether or not only do lock on
+ queries
+ that need to execute at least one mapred job.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.row.max.size</name>
+ <value>100000</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ how much size we need to guarantee
+ each row to have at least.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.limit.file</name>
+ <value>10</value>
+ <description>When trying a smaller subset of data for simple LIMIT,
+ maximum number of files we can
+ sample.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.enable</name>
+ <value>false</value>
+ <description>Whether to enable to optimization to trying a smaller
+ subset of data for simple LIMIT first.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.limit.optimize.fetch.max</name>
+ <value>50000</value>
+ <description>Maximum number of rows allowed for a smaller subset of
+ data for simple LIMIT, if it is a fetch query.
+ Insert queries are not
+ restricted by this limit.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.rework.mapredwork</name>
+ <value>false</value>
+ <description>should rework the mapred work or not.
+ This is first
+ introduced by SymlinkTextInputFormat to replace symlink
+ files with
+ real paths at compile time.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.concatenate.check.index</name>
+ <value>true</value>
+ <description>If this sets to true, hive will throw error when doing
+ 'alter table tbl_name [partSpec] concatenate' on a table/partition
+ that has indexes on it. The reason the user want to set this to true
+ is because it can help user to avoid handling all index drop,
+ recreation,
+ rebuild work. This is very helpful for tables with
+ thousands of partitions.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.sample.seednumber</name>
+ <value>0</value>
+ <description>A number used to percentage sampling. By changing this
+ number, user will change the subsets
+ of data sampled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.io.exception.handlers</name>
+ <value></value>
+ <description>A list of io exception handler class names. This is used
+ to construct a list exception handlers to handle exceptions thrown
+ by
+ record readers
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.label</name>
+ <value>_c</value>
+ <description>String used as a prefix when auto generating column
+ alias.
+ By default the prefix label will be appended with a column
+ position
+ number to form the column alias. Auto generation would happen
+ if an
+ aggregate function is used in a select clause without an
+ explicit
+ alias.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.autogen.columnalias.prefix.includefuncname</name>
+ <value>false</value>
+ <description>Whether to include function name in the column alias auto
+ generated by hive.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.perf.logger</name>
+ <value>org.apache.hadoop.hive.ql.log.PerfLogger</value>
+ <description>The class responsible logging client side performance
+ metrics. Must be a subclass of
+ org.apache.hadoop.hive.ql.log.PerfLogger
+ </description>
+ </property>
+
+ <property>
+ <name>hive.start.cleanup.scratchdir</name>
+ <value>false</value>
+ <description>To cleanup the hive scratchdir while starting the hive
+ server
+ </description>
+ </property>
+
+ <property>
+ <name>hive.output.file.extension</name>
+ <value></value>
+ <description>String used as a file extension for output files. If not
+ set, defaults to the codec extension for text files (e.g. ".gz"), or
+ no extension otherwise.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.insert.into.multilevel.dirs</name>
+ <value>false</value>
+ <description>Where to insert into multilevel directories like
+ "insert
+ directory '/HIVEFT25686/chinna/' from table"
+ </description>
+ </property>
+
+ <property>
+ <name>hive.warehouse.subdir.inherit.perms</name>
+ <value>false</value>
+ <description>Set this to true if the the table directories should
+ inherit the
+ permission of the warehouse or database directory instead
+ of being created
+ with the permissions derived from dfs umask
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.job.debug.capture.stacktraces</name>
+ <value>true</value>
+ <description>Whether or not stack traces parsed from the task logs of
+ a sampled failed task for
+ each failed job should be stored in the
+ SessionState
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.driver.run.hooks</name>
+ <value></value>
+ <description>A comma separated list of hooks which implement
+ HiveDriverRunHook and will be run at the
+ beginning and end of
+ Driver.run, these will be run in the order specified
+ </description>
+ </property>
+
+ <property>
+ <name>hive.ddl.output.format</name>
+ <value>text</value>
+ <description>
+ The data format to use for DDL output. One of "text" (for
+ human
+ readable text) or "json" (for a json object).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.transform.escape.input</name>
+ <value>false</value>
+ <description>
+ This adds an option to escape special chars (newlines,
+ carriage returns
+ and
+ tabs) when they are passed to the user script.
+ This is useful if the hive
+ tables
+ can contain data that contains
+ special characters.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.rcfile.use.explicit.header</name>
+ <value>true</value>
+ <description>
+ If this is set the header for RC Files will simply be
+ RCF. If this is
+ not
+ set the header will be that borrowed from sequence
+ files, e.g. SEQ-
+ followed
+ by the input and output RC File formats.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.multi.insert.move.tasks.share.dependencies</name>
+ <value>false</value>
+ <description>
+ If this is set all move tasks for tables/partitions (not
+ directories)
+ at the end of a
+ multi-insert query will only begin once
+ the dependencies for all these move tasks
+ have been
+ met.
+ Advantages: If
+ concurrency is enabled, the locks will only be released once the
+ query has
+ finished, so with this config enabled, the time when the
+ table/partition is
+ generated will be much closer to when the lock on
+ it is released.
+ Disadvantages: If concurrency is not enabled, with
+ this disabled, the
+ tables/partitions which
+ are produced by this query
+ and finish earlier will be available for
+ querying
+ much earlier. Since
+ the locks are only released once the query finishes,
+ this
+ does not
+ apply if concurrency is enabled.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.fetch.task.conversion</name>
+ <value>minimal</value>
+ <description>
+ Some select queries can be converted to single FETCH task
+ minimizing
+ latency.
+ Currently the query should be single sourced not
+ having any subquery and
+ should not have
+ any aggregations or distincts
+ (which incurrs RS), lateral views and
+ joins.
+ 1. minimal : SELECT STAR,
+ FILTER on partition columns, LIMIT only
+ 2. more : SELECT, FILTER,
+ LIMIT only (TABLESAMPLE, virtual columns)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.attempts</name>
+ <value>1</value>
+ <description>The number of times to retry a HMSHandler call if there
+ were a connection error
+ </description>
+ </property>
+
+ <property>
+ <name>hive.hmshandler.retry.interval</name>
+ <value>1000</value>
+ <description>The number of miliseconds between HMSHandler retry
+ attempts
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.read.socket.timeout</name>
+ <value>10</value>
+ <description>Timeout for the HiveServer to close the connection if no
+ response from the client in N seconds, defaults to 10 seconds.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server.tcp.keepalive</name>
+ <value>true</value>
+ <description>Whether to enable TCP keepalive for the Hive server.
+ Keepalive will prevent accumulation of half-open connections.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.decode.partition.name</name>
+ <value>false</value>
+ <description>Whether to show the unquoted partition names in query
+ results.
+ </description>
+ </property>
+
+ <property>
+ <name>hive.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file.
+ If the property is not set,
+ then logging will be initialized using
+ hive-log4j.properties found on
+ the classpath.
+ If the property is set, the value must be a valid URI
+ (java.net.URI,
+ e.g. "file:///tmp/my-logging.properties"), which you
+ can then extract
+ a URL from and pass to
+ PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.log4j.file</name>
+ <value></value>
+ <description>Hive log4j configuration file for execution mode(sub
+ command).
+ If the property is not set, then logging will be initialized
+ using
+ hive-exec-log4j.properties found on the classpath.
+ If the
+ property is set, the value must be a valid URI (java.net.URI,
+ e.g.
+ "file:///tmp/my-logging.properties"), which you can then extract
+ a URL
+ from and pass to PropertyConfigurator.configure(URL).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort</name>
+ <value>false</value>
+ <description>
+ If this is set, when writing partitions, the metadata
+ will include the
+ bucketing/sorting
+ properties with which the data was
+ written if any (this will not overwrite the
+ metadata
+ inherited from the
+ table if the table is bucketed/sorted)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.exec.infer.bucket.sort.num.buckets.power.two</name>
+ <value>false</value>
+ <description>
+ If this is set, when setting the number of reducers for
+ the map reduce
+ task which writes the
+ final output files, it will choose
+ a number which is a power of two,
+ unless the user specifies
+ the number
+ of reducers to use using mapred.reduce.tasks. The number of
+ reducers
+ may be set to a power of two, only to be followed by a merge task
+ meaning preventing
+ anything from being inferred.
+ With
+ hive.exec.infer.bucket.sort set to true:
+ Advantages: If this is not
+ set, the number of buckets for partitions will seem
+ arbitrary,
+ which
+ means that the number of mappers used for optimized joins, for
+ example, will
+ be very low. With this set, since the number of buckets
+ used for any
+ partition is
+ a power of two, the number of mappers used
+ for optimized joins will be
+ the least
+ number of buckets used by any
+ partition being joined.
+ Disadvantages: This may mean a much larger or
+ much smaller number of reducers being
+ used in the
+ final map reduce job,
+ e.g. if a job was originally going to take 257
+ reducers,
+ it will now
+ take 512 reducers, similarly if the max number of reducers
+ is 511,
+ and
+ a job was going to use this many, it will now use 256 reducers.
+
+ </description>
+ </property>
+
+ <property>
+ <name>hive.groupby.orderby.position.alias</name>
+ <value>false</value>
+ <description>Whether to enable using Column Position Alias in Group By
+ or Order By
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.min.worker.threads</name>
+ <value>5</value>
+ <description>Minimum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.max.worker.threads</name>
+ <value>100</value>
+ <description>Maximum number of Thrift worker threads</description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.port</name>
+ <value>10000</value>
+ <description>Port number of HiveServer2 Thrift interface.
+ Can be
+ overridden by setting $HIVE_SERVER2_THRIFT_PORT
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.thrift.bind.host</name>
+ <value>localhost</value>
+ <description>Bind host on which to run the HiveServer2 Thrift
+ interface.
+ Can be overridden by setting $HIVE_SERVER2_THRIFT_BIND_HOST
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication</name>
+ <value>NONE</value>
+ <description>
+ Client authentication types.
+ NONE: no authentication check
+ LDAP: LDAP/AD based authentication
+ KERBEROS: Kerberos/GSSAPI
+ authentication
+ CUSTOM: Custom authentication provider
+ (Use with
+ property hive.server2.custom.authentication.class)
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.custom.authentication.class</name>
+ <value></value>
+ <description>
+ Custom authentication class. Used when property
+ 'hive.server2.authentication' is set to 'CUSTOM'. Provided class
+ must
+ be a proper implementation of the interface
+ org.apache.hive.service.auth.PasswdAuthenticationProvider.
+ HiveServer2
+ will call its Authenticate(user, passed) method to
+ authenticate requests.
+ The implementation may optionally extend the
+ Hadoop's
+ org.apache.hadoop.conf.Configured class to grab Hive's
+ Configuration
+ object.
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.principal</name>
+ <value></value>
+ <description>
+ Kerberos server principal
+ </description>
+ </property>
+
+ <property>
+ <name>>hive.server2.authentication.kerberos.keytab</name>
+ <value></value>
+ <description>
+ Kerberos keytab file for server principal
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.url</name>
+ <value></value>
+ <description>
+ LDAP connection URL
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.authentication.ldap.baseDN</name>
+ <value></value>
+ <description>
+ LDAP base DN
+ </description>
+ </property>
+
+ <property>
+ <name>hive.server2.enable.doAs</name>
+ <value>true</value>
+ <description>
+ Setting this property to true will have hive server2
+ execute
+ hive operations as the user making the calls to it.
+ </description>
+ </property>
+
+
+</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh
new file mode 100644
index 0000000..7767b2d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/copylog.sh
@@ -0,0 +1,7 @@
+. conf/cluster.properties
+
+NODEID=`hostname | cut -d '.' -f 1`
+#echo $NODEID
+
+#echo "rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}"
+rsync ${NCLOGS_DIR}/${NODEID}.log ${1}:${2}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh
new file mode 100644
index 0000000..e7d45e8
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumpAll.sh
@@ -0,0 +1,12 @@
+. conf/cluster.properties
+PREGELIX_PATH=`pwd`
+LOG_PATH=$PREGELIX_PATH/logs/
+rm -rf $LOG_PATH
+mkdir $LOG_PATH
+ccname=`hostname`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; bin/dumptrace.sh; bin/copylog.sh ${ccname} ${LOG_PATH}"
+done
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh
new file mode 100644
index 0000000..9fe55f0
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/dumptrace.sh
@@ -0,0 +1,15 @@
+echo `hostname`
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ PID=`ps -ef|grep ${USER}|grep java|grep 'hyracks'|awk '{print $2}'`
+fi
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=pregelixnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -QUIT $PID
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
index 38a9e33..d2ef909 100755
--- a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
@@ -175,13 +175,6 @@
exit 5
fi
-if [ $hadoop_minor_ver -ne 20 -o $hadoop_patch_ver -eq 0 ]; then
- echo "Hive requires Hadoop 0.20.x (x >= 1)."
- echo "'hadoop version' returned:"
- echo `$HADOOP version`
- exit 6
-fi
-
if [ "${AUX_PARAM}" != "" ]; then
HIVE_OPTS="$HIVE_OPTS -hiveconf hive.aux.jars.path=${AUX_PARAM}"
AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
index b024269..5fdfb3a 100644
--- a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
@@ -12,77 +12,77 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
index 062f7b9..ae2fa3a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q12_shipping
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
index 76d0475..c617c26 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
@@ -13,8 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-- the query
insert overwrite table q18_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
index fd330cd..a7a0a0a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-- the query
insert overwrite table q19_discounted_revenue
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
index 63297e6..3149962 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
@@ -22,7 +22,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q20_tmp1
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
index a975ce1..091f000 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
index 3dfb22a..444644f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
@@ -17,8 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q7_volume_shipping_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
index 586779c..a9bb58b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
@@ -17,8 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-- the query
insert overwrite table q9_product_type_profit
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
index 2891c56..70b3538 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
@@ -6,4 +6,7 @@
create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/orders';
create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/customer';
-select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from customer c join orders o on c.c_custkey = o.o_custkey join lineitem l on o.o_orderkey = l.l_orderkey where c.c_custkey<5 and o.o_totalprice<30000;
+select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from
+ customer c join orders o on c.c_custkey = o.o_custkey
+ join lineitem l on o.o_orderkey = l.l_orderkey
+ where c.c_custkey<5 and o.o_totalprice<30000;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
index 05b3718..bda1113 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
@@ -1,5 +1,5 @@
write [%0->$$38, %0->$$39, %0->$$45, %0->$$40, %0->$$42, %0->$$43, %0->$$41, %0->$$44]
--- SINK_WRITE |UNPARTITIONED|
+-- SINK_WRITE |PARTITIONED|
project ([$$38, $$39, $$45, $$40, $$42, $$43, $$41, $$44])
-- STREAM_PROJECT |PARTITIONED|
limit 20
@@ -9,11 +9,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$45(DESC) ] |PARTITIONED|
limit 20
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$45)
- -- STABLE_SORT [$$45(DESC)] |LOCAL|
+ -- STABLE_SORT [$$45(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$38 := %0->$$48; $$39 := %0->$$49; $$40 := %0->$$50; $$41 := %0->$$51; $$42 := %0->$$52; $$43 := %0->$$53; $$44 := %0->$$54]) decor ([]) {
@@ -31,7 +31,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$21, $$22, $$26, $$25, $$18, $$23, $$28] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$21, $$22, $$26, $$25, $$18, $$23, $$28] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$21, $$22, $$23, $$25, $$26, $$28, $$18, $$6, $$7])
@@ -82,7 +82,7 @@
-- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED|
project ([$$30, $$29])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01], function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -99,4 +99,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
index 70ad7ee..8195ef0 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$3, $$4])
@@ -85,7 +85,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2]<-[$$1, $$2] <- default.q11_part_tmp
@@ -99,7 +99,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(DESC) ] |PARTITIONED|
order (DESC, %0->$$3)
- -- STABLE_SORT [$$3(DESC)] |LOCAL|
+ -- STABLE_SORT [$$3(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3])
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
index 5c240e2..d976bba 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
@@ -7,7 +7,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$26(ASC) ] |PARTITIONED|
order (ASC, %0->$$26)
- -- STABLE_SORT [$$26(ASC)] |LOCAL|
+ -- STABLE_SORT [$$26(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$26 := %0->$$34]) decor ([]) {
@@ -25,7 +25,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$24] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$24] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$24])
@@ -38,7 +38,7 @@
-- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
project ([$$10, $$24])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -55,4 +55,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
index 19bcd24..40cedd6 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
@@ -7,74 +7,65 @@
exchange
-- SORT_MERGE_EXCHANGE [$$21(DESC), $$20(DESC) ] |PARTITIONED|
order (DESC, %0->$$21) (DESC, %0->$$20)
- -- STABLE_SORT [$$21(DESC), $$20(DESC)] |LOCAL|
+ -- STABLE_SORT [$$21(DESC), $$20(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$20 := %0->$$28]) decor ([]) {
- aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$27]]
+ group by ([$$20 := %0->$$26]) decor ([]) {
+ aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$28] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$28] |PARTITIONED|
- group by ([$$28 := %0->$$19]) decor ([]) {
- aggregate [$$27] <- [function-call: hive:count(PARTIAL1), Args:[1]]
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ group by ([$$26 := %0->$$19]) decor ([]) {
+ aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$19] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$19] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$19])
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$18 := %0->$$26]) decor ([]) {
- aggregate [$$19] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
+ group by ([$$18 := %0->$$10]) decor ([]) {
+ aggregate [$$19] <- [function-call: hive:count(COMPLETE), Args:[%0->$$1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$10] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- group by ([$$26 := %0->$$10]) decor ([]) {
- aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[%0->$$1]]
- -- AGGREGATE |LOCAL|
- nested tuple source
- -- NESTED_TUPLE_SOURCE |LOCAL|
- }
- -- EXTERNAL_GROUP_BY[$$10] |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$10, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- project ([$$10, $$1])
- -- STREAM_PROJECT |PARTITIONED|
+ left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
+ -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
- -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
+ data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
- data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- project ([$$2, $$1])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
- -- STREAM_SELECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
+ project ([$$2, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
index 21b90bd..e6d1c1d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
@@ -21,7 +21,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$5, $$15, $$16])
@@ -34,7 +34,7 @@
-- HASH_PARTITION_EXCHANGE [$$11] |PARTITIONED|
project ([$$11, $$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01], function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -51,4 +51,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
index a5bd27a..c61ed37 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
@@ -19,12 +19,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$3] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$3] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01], function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -53,7 +53,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- PRE_CLUSTERED_GROUP_BY[] |LOCAL|
+ -- PRE_CLUSTERED_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2]<-[$$1, $$2] <- default.revenue
@@ -67,7 +67,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$2(ASC) ] |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3, $$4, $$6, $$10])
@@ -107,4 +107,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
index 9835346..c986cb4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
@@ -38,7 +38,7 @@
-- HYBRID_HASH_JOIN [$$2][$$11] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]], function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -65,7 +65,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC) ] |PARTITIONED|
order (DESC, %0->$$13) (ASC, %0->$$10) (ASC, %0->$$11) (ASC, %0->$$12)
- -- STABLE_SORT [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC)] |LOCAL|
+ -- STABLE_SORT [$$13(DESC), $$10(ASC), $$11(ASC), $$12(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$10 := %0->$$5; $$11 := %0->$$6; $$12 := %0->$$7]) decor ([]) {
@@ -95,4 +95,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
index a827007..8a75f64 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
@@ -21,7 +21,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$2] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$2] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$2, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -53,13 +53,13 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$17])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:lt, Args:[%0->$$16, %0->$$2])
- -- STREAM_SELECT |UNPARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$13, %0->$$1])
@@ -84,7 +84,7 @@
-- HASH_PARTITION_EXCHANGE [$$3] |PARTITIONED|
project ([$$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$9, MED BOX], function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -101,4 +101,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
index ea47ea0..eb78f1d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$1, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -29,7 +29,7 @@
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
write [%0->$$36, %0->$$37, %0->$$38, %0->$$39, %0->$$40, %0->$$41]
--- SINK_WRITE |UNPARTITIONED|
+-- SINK_WRITE |PARTITIONED|
project ([$$36, $$37, $$38, $$39, $$40, $$41])
-- STREAM_PROJECT |PARTITIONED|
limit 100
@@ -39,11 +39,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$40(DESC), $$39(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$40) (ASC, %0->$$39)
- -- STABLE_SORT [$$40(DESC), $$39(ASC)] |LOCAL|
+ -- STABLE_SORT [$$40(DESC), $$39(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$36 := %0->$$44; $$37 := %0->$$45; $$38 := %0->$$46; $$39 := %0->$$47; $$40 := %0->$$48]) decor ([]) {
@@ -61,7 +61,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$20, $$19, $$27, $$31, $$30] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$20, $$19, $$27, $$31, $$30] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$19, $$20, $$27, $$30, $$31, $$7])
@@ -90,7 +90,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:gt, Args:[%0->$$2, 300])
+ select (function-call: algebricks:gt, Args:[%0->$$2, 300.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
index 1827729..9e97b7a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
@@ -17,13 +17,13 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1]], function-call: algebricks:le, Args:[%0->$$14, 11]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10]], function-call: algebricks:le, Args:[%0->$$14, 20]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20]], function-call: algebricks:le, Args:[%0->$$14, 30]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
- -- STREAM_SELECT |UNPARTITIONED|
+ select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1.0]], function-call: algebricks:le, Args:[%0->$$14, 11.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10.0]], function-call: algebricks:le, Args:[%0->$$14, 20.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20.0]], function-call: algebricks:le, Args:[%0->$$14, 30.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$11, %0->$$1])
@@ -43,4 +43,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
index 0e9c90f..de964ac 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
@@ -7,7 +7,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$17(ASC), $$18(ASC) ] |PARTITIONED|
order (ASC, %0->$$17) (ASC, %0->$$18)
- -- STABLE_SORT [$$17(ASC), $$18(ASC)] |LOCAL|
+ -- STABLE_SORT [$$17(ASC), $$18(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$17 := %0->$$37; $$18 := %0->$$38]) decor ([]) {
@@ -25,7 +25,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$9, $$10] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$9, $$10] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$9, $$10, $$5, $$6, $$7, $$8])
@@ -39,4 +39,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
index eddfca5..ded599c 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
@@ -5,9 +5,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -45,12 +45,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$2, $$3] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$2, $$3] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3, $$5])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -107,9 +107,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -129,7 +129,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(ASC) ] |PARTITIONED|
order (ASC, %0->$$3)
- -- STABLE_SORT [$$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$4])
@@ -175,4 +175,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
index cc47cf3..a22bf53 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
@@ -23,9 +23,9 @@
}
-- PRE_CLUSTERED_GROUP_BY[$$1, $$3] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1) (ASC, %0->$$3)
- -- STABLE_SORT [$$1(ASC), $$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC), $$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$1, $$3]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9, $$10, $$11, $$12, $$13, $$14, $$15, $$16] <- default.lineitem
@@ -59,9 +59,9 @@
}
-- PRE_CLUSTERED_GROUP_BY[$$1, $$3] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1) (ASC, %0->$$3)
- -- STABLE_SORT [$$1(ASC), $$3(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC), $$3(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$3])
@@ -89,11 +89,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$44(DESC), $$43(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$44) (ASC, %0->$$43)
- -- STABLE_SORT [$$44(DESC), $$43(ASC)] |LOCAL|
+ -- STABLE_SORT [$$44(DESC), $$43(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$43 := %0->$$48]) decor ([]) {
@@ -111,7 +111,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$37] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$37] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$37])
@@ -127,7 +127,7 @@
project ([$$37, $$4, $$18])
-- STREAM_PROJECT |PARTITIONED|
select (function-call: algebricks:or, Args:[function-call: algebricks:gt, Args:[%0->$$5, 1], function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$5, 1], function-call: algebricks:neq, Args:[%0->$$18, %0->$$6]]])
- -- STREAM_SELECT |UNPARTITIONED|
+ -- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (function-call: algebricks:eq, Args:[%0->$$16, %0->$$4])
@@ -150,9 +150,9 @@
-- HYBRID_HASH_JOIN [$$18][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$18] |PARTITIONED|
- project ([$$16, $$18])
+ project ([$$18, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:gt, Args:[%0->$$28, %0->$$27], function-call: algebricks:gt, Args:[%0->$$28, %0->$$27]])
+ select (function-call: algebricks:gt, Args:[%0->$$28, %0->$$27])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -221,4 +221,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
index 591576b..c5897f7 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
@@ -33,7 +33,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
select (function-call: algebricks:gt, Args:[%0->$$1, 0.0])
@@ -53,9 +53,9 @@
distinct ([%0->$$2])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
data-scan [$$2]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
@@ -73,7 +73,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$6(ASC) ] |PARTITIONED|
order (ASC, %0->$$6)
- -- STABLE_SORT [$$6(ASC)] |LOCAL|
+ -- STABLE_SORT [$$6(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$6 := %0->$$13]) decor ([]) {
@@ -91,7 +91,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$5] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$5] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$5, $$3])
@@ -133,4 +133,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
index 151f34d..6138f7a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
@@ -10,7 +10,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1, $$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS], function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -103,11 +103,11 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- PRE_CLUSTERED_GROUP_BY[$$4] |LOCAL|
+ -- PRE_CLUSTERED_GROUP_BY[$$4] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$4)
- -- STABLE_SORT [$$4(ASC)] |LOCAL|
+ -- STABLE_SORT [$$4(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
data-scan [$$4, $$5]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.q2_minimum_cost_supplier_tmp1
@@ -125,11 +125,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC) ] |PARTITIONED|
limit 100
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$3) (ASC, %0->$$5) (ASC, %0->$$4) (ASC, %0->$$6)
- -- STABLE_SORT [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC)] |LOCAL|
+ -- STABLE_SORT [$$3(DESC), $$5(ASC), $$4(ASC), $$6(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$4, $$5, $$6, $$8, $$9, $$10, $$11])
@@ -153,4 +153,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
index a1b8e42..31c4210 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
@@ -7,11 +7,11 @@
exchange
-- SORT_MERGE_EXCHANGE [$$34(DESC) ] |PARTITIONED|
limit 10
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (DESC, %0->$$34)
- -- STABLE_SORT [$$34(DESC)] |LOCAL|
+ -- STABLE_SORT [$$34(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$34, $$29, $$32])
@@ -67,4 +67,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
index 435fd7c..be5a66a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
@@ -5,9 +5,9 @@
distinct ([%0->$$1])
-- PRE_SORTED_DISTINCT_BY |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$1)
- -- STABLE_SORT [$$1(ASC)] |LOCAL|
+ -- STABLE_SORT [$$1(ASC)] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
@@ -31,7 +31,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$11(ASC) ] |PARTITIONED|
order (ASC, %0->$$11)
- -- STABLE_SORT [$$11(ASC)] |LOCAL|
+ -- STABLE_SORT [$$11(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$11 := %0->$$16]) decor ([]) {
@@ -49,7 +49,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$7] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$7] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$7])
@@ -70,7 +70,7 @@
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
project ([$$2, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01], function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -79,4 +79,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
index 177d24c..383e550 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
@@ -5,7 +5,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$49(DESC) ] |PARTITIONED|
order (DESC, %0->$$49)
- -- STABLE_SORT [$$49(DESC)] |LOCAL|
+ -- STABLE_SORT [$$49(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$48 := %0->$$52]) decor ([]) {
@@ -23,7 +23,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$42] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$42] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$42, $$23, $$24])
@@ -106,7 +106,7 @@
-- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
project ([$$9, $$10])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -123,4 +123,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
index cd9ffcd..aac9a5b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
@@ -17,12 +17,12 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[] |LOCAL|
+ -- EXTERNAL_GROUP_BY[] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24.0]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -31,4 +31,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
index 39f8301..c1d5b26 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
@@ -5,9 +5,9 @@
union ($$6, $$10, $$17) ($$2, $$14, $$18) ($$5, $$9, $$19) ($$1, $$13, $$20)
-- UNION_ALL |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$2, $$5, $$1])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (true)
@@ -31,9 +31,9 @@
select (function-call: algebricks:eq, Args:[%0->$$6, FRANCE])
-- STREAM_SELECT |PARTITIONED|
project ([$$5, $$6])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
assign [$$5, $$6] <- [%0->$$9, %0->$$10]
- -- ASSIGN |UNPARTITIONED|
+ -- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
@@ -47,9 +47,9 @@
empty-tuple-source
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$10, $$14, $$9, $$13])
- -- STREAM_PROJECT |UNPARTITIONED|
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
join (true)
@@ -91,7 +91,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$47(ASC), $$48(ASC), $$49(ASC) ] |PARTITIONED|
order (ASC, %0->$$47) (ASC, %0->$$48) (ASC, %0->$$49)
- -- STABLE_SORT [$$47(ASC), $$48(ASC), $$49(ASC)] |LOCAL|
+ -- STABLE_SORT [$$47(ASC), $$48(ASC), $$49(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$47 := %0->$$53; $$48 := %0->$$54; $$49 := %0->$$55]) decor ([]) {
@@ -109,7 +109,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$1, $$2, $$45] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$1, $$2, $$45] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$1, $$2, $$45, $$46])
@@ -148,7 +148,7 @@
-- HYBRID_HASH_JOIN [$$20][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$20] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:le, Args:[%0->$$30, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -189,4 +189,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
index b807a24..b9916e2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
@@ -3,7 +3,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$63(ASC) ] |PARTITIONED|
order (ASC, %0->$$63)
- -- STABLE_SORT [$$63(ASC)] |LOCAL|
+ -- STABLE_SORT [$$63(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$63, $$66])
@@ -27,7 +27,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$61] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$61] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$61, $$62, $$2])
@@ -138,7 +138,7 @@
-- HASH_PARTITION_EXCHANGE [$$38] |PARTITIONED|
project ([$$38, $$37, $$41])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01], function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
@@ -175,7 +175,7 @@
project ([$$1, $$2])
-- STREAM_PROJECT |PARTITIONED|
assign [$$1, $$2, $$3, $$4] <- [%0->$$54, %0->$$55, %0->$$56, %0->$$57]
- -- ASSIGN |UNPARTITIONED|
+ -- ASSIGN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
replicate
@@ -187,4 +187,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
index f57f4a3..ecf4acb 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
@@ -5,7 +5,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$53(ASC), $$54(DESC) ] |PARTITIONED|
order (ASC, %0->$$53) (DESC, %0->$$54)
- -- STABLE_SORT [$$53(ASC), $$54(DESC)] |LOCAL|
+ -- STABLE_SORT [$$53(ASC), $$54(DESC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
group by ([$$53 := %0->$$58; $$54 := %0->$$59]) decor ([]) {
@@ -23,7 +23,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$48, $$51] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$48, $$51] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$48, $$51, $$52])
@@ -121,4 +121,4 @@
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
index 188aa6d..bec1353 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
@@ -19,7 +19,7 @@
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$9] |LOCAL|
+ -- EXTERNAL_GROUP_BY[$$9] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$9, $$5, $$6, $$7, $$8])
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
index 48e624e..2cbea4a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
@@ -6,7 +6,7 @@
-- UNNEST |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$3]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
+ data-scan [$$2, $$3, $$1]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
index b5ed12f..6a0b125 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
@@ -7,14 +7,14 @@
exchange
-- SORT_MERGE_EXCHANGE [$$4(ASC) ] |PARTITIONED|
limit 4
- -- STREAM_LIMIT |LOCAL|
+ -- STREAM_LIMIT |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
order (ASC, %0->$$4)
- -- STABLE_SORT [$$4(ASC)] |LOCAL|
+ -- STABLE_SORT [$$4(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- select (function-call: algebricks:lt, Args:[%0->$$4, 10000])
+ select (function-call: algebricks:lt, Args:[%0->$$4, 10000.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
index ab55181..b5f1dc2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
@@ -16,37 +16,35 @@
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$25] |PARTITIONED|
- project ([$$32, $$25, $$29, $$28])
+ project ([$$25, $$28, $$29, $$32])
-- STREAM_PROJECT |PARTITIONED|
- project ([$$25, $$17, $$28, $$29, $$32])
- -- STREAM_PROJECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
- -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000], function-call: algebricks:lt, Args:[%0->$$28, 30000]])
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
+ -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000.0], function-call: algebricks:lt, Args:[%0->$$26, 5]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:lt, Args:[%0->$$17, 5])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
- project ([$$17])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$17, 5], function-call: algebricks:lt, Args:[%0->$$17, 5]])
- -- STREAM_SELECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
index 7370fcf..344898d 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
@@ -3,7 +3,7 @@
exchange
-- SORT_MERGE_EXCHANGE [$$2(ASC) ] |PARTITIONED|
order (ASC, %0->$$2)
- -- STABLE_SORT [$$2(ASC)] |LOCAL|
+ -- STABLE_SORT [$$2(ASC)] |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
select (function-call: algebricks:lt, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFOPMultiply, Args:[%0->$$1, 2], 20])
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
index a4ee677..49cdedf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
@@ -1,22 +1,66 @@
<?xml version="1.0"?>
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- !
- ! http://www.apache.org/licenses/LICENSE-2.0
- !
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
+<!-- ! Copyright 2009-2013 by The Regents of the University of California
+ ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may
+ not use this file except in compliance with the License. ! you may obtain
+ a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0
+ ! ! Unless required by applicable law or agreed to in writing, software !
+ distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the
+ License for the specific language governing permissions and ! limitations
+ under the License. ! -->
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
+ <!-- Hivesterix Execution Parameters -->
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
+
+ <property>
+ <name>hive.hyracks.parrallelism</name>
+ <value>4</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external</name>
+ <value>true</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.groupby.external.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.sort.memory</name>
+ <value>3072</value>
+ </property>
+
+ <property>
+ <name>hive.algebricks.framesize</name>
+ <value>768</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting
+ common join into mapjoin based on the input file
+ size. If this paramater is on, and the sum of size for n-1 of the
+ tables/partitions for a n-way join is smaller than the
+ specified size, the join is directly converted to a mapjoin (there is no
+ conditional task).
+ </description>
+ </property>
+
+
<!-- Hive Configuration can either be stored in this file or in the hadoop
configuration files -->
<!-- that are implied by Hadoop setup variables. -->
@@ -42,52 +86,6 @@
</property>
<property>
- <name>hive.hyracks.connectorpolicy</name>
- <value>SEND_SIDE_MAT_PIPELINING</value>
- </property>
-
- <property>
- <name>hive.hyracks.host</name>
- <value>127.0.0.1</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>13099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
-
-
- <property>
- <name>hive.hyracks.parrallelism</name>
- <value>2</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external</name>
- <value>true</value>
- </property>
-
- <property>
- <name>hive.algebricks.groupby.external.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.sort.memory</name>
- <value>3072</value>
- </property>
-
- <property>
- <name>hive.algebricks.framesize</name>
- <value>768</value>
- </property>
-
- <property>
<name>hive.exec.reducers.bytes.per.reducer</name>
<value>1000000000</value>
<description>size per reducer.The default is 1G, i.e if the input size
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
index a7d8d9c..f886a44 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
@@ -43,7 +43,7 @@
# Note that the ConsoleHandler also has a separate level
# setting to limit messages printed to the console.
-.level= WARNING
+.level= INFO
# .level= INFO
# .level= FINE
# .level = FINEST
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
index 062f7b9..8546365 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q12_shipping
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
index ac2902c..d1eaacc 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
@@ -13,9 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-
-- the query
insert overwrite table q18_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
index 2002e1e..6badfcf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q19_discounted_revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
index 2bb90ea..32181bf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
@@ -23,8 +23,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table q20_tmp1
select distinct p_partkey
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
index 9af2dd2..838a1e8 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
index 2678f80..12ae8ae 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
@@ -17,9 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q7_volume_shipping_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
index 2e5b4a1..c491997 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
@@ -17,9 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q9_product_type_profit
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
new file mode 100644
index 0000000..6efd2ae
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u8_order_by.hive
@@ -0,0 +1,8 @@
+drop table IF EXISTS nation;
+drop table IF EXISTS u8_non_mapred;
+
+create external table nation (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/nation';
+create table u8_order_by (N_NATIONKEY INT, N_NAME STRING, N_REGIONKEY INT, N_COMMENT STRING);
+
+insert overwrite table u8_order_by
+select * FROM nation order by N_NATIONKEY;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
new file mode 100644
index 0000000..719b246
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u8_order_by.result
@@ -0,0 +1,25 @@
+0ALGERIA0 haggle. carefully final deposits detect slyly agai
+1ARGENTINA1al foxes promise slyly according to the regular accounts. bold requests alon
+2BRAZIL1y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
+3CANADA1eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
+4EGYPT4y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
+5ETHIOPIA0ven packages wake quickly. regu
+6FRANCE3refully final requests. regular, ironi
+7GERMANY3l platelets. regular accounts x-ray: unusual, regular acco
+8INDIA2ss excuses cajole slyly across the packages. deposits print aroun
+9INDONESIA2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
+10IRAN4efully alongside of the slyly final dependencies.
+11IRAQ4nic deposits boost atop the quickly final requests? quickly regula
+12JAPAN2ously. final, express gifts cajole a
+13JORDAN4ic deposits are blithely about the carefully regular pa
+14KENYA0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
+15MOROCCO0rns. blithely bold courts among the closely regular packages use furiously bold platelets?
+16MOZAMBIQUE0s. ironic, unusual asymptotes wake blithely r
+17PERU1platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
+18CHINA2c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
+19ROMANIA3ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
+20SAUDI ARABIA4ts. silent requests haggle. closely express packages sleep across the blithely
+21VIETNAM2hely enticingly express accounts. even, final
+22RUSSIA3 requests against the platelets use never according to the quickly regular pint
+23UNITED KINGDOM3eans boost carefully special requests. accounts are. carefull
+24UNITED STATES1y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
diff --git a/hivesterix/hivesterix-optimizer/pom.xml b/hivesterix/hivesterix-optimizer/pom.xml
index ba7c7ad..858507e 100644
--- a/hivesterix/hivesterix-optimizer/pom.xml
+++ b/hivesterix/hivesterix-optimizer/pom.xml
@@ -18,7 +18,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>hivesterix-optimizer</artifactId>
@@ -44,14 +44,14 @@
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-translator</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
index 959e73e..12b5986 100644
--- a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
@@ -12,115 +12,116 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.optimizer.rulecollections;
-
-import java.util.LinkedList;
-
-import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
-import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
-import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
-import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
-
-public final class HiveRuleCollections {
-
- public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- NORMALIZATION.add(new EliminateSubplanRule());
- NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
- NORMALIZATION.add(new PushSelectIntoJoinRule());
- NORMALIZATION.add(new ExtractGbyExpressionsRule());
- NORMALIZATION.add(new RemoveRedundantSelectRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
- static {
- // should LoadRecordFieldsRule be applied in only one pass over the
- // plan?
- LOAD_FIELDS.add(new InlineVariablesRule());
- // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
- LOAD_FIELDS.add(new ComplexJoinInferenceRule());
- LOAD_FIELDS.add(new InferTypesRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
- static {
- OP_PUSHDOWN.add(new PushProjectDownRule());
- OP_PUSHDOWN.add(new PushSelectDownRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- DATA_EXCHANGE.add(new SetExecutionModeRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- CONSOLIDATION.add(new RemoveRedundantProjectionRule());
- CONSOLIDATION.add(new ConsolidateSelectsRule());
- CONSOLIDATION.add(new IntroduceEarlyProjectRule());
- CONSOLIDATION.add(new ConsolidateAssignsRule());
- CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
- CONSOLIDATION.add(new IntroduceAggregateCombinerRule());
- CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
- static {
- PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
- PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
- static {
- prepareJobGenRules.add(new ReinferAllTypesRule());
- prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
- HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
- prepareJobGenRules.add(new ExtractCommonOperatorsRule());
- prepareJobGenRules.add(new LocalGroupByRule());
- prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
- prepareJobGenRules.add(new ReinferAllTypesRule());
- }
-
-}
+package edu.uci.ics.hivesterix.optimizer.rulecollections;
+
+import java.util.LinkedList;
+
+import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
+import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
+import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
+import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
+
+public final class HiveRuleCollections {
+
+ public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ NORMALIZATION.add(new EliminateSubplanRule());
+ NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
+ NORMALIZATION.add(new PushSelectIntoJoinRule());
+ NORMALIZATION.add(new ExtractGbyExpressionsRule());
+ NORMALIZATION.add(new RemoveRedundantSelectRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ // should LoadRecordFieldsRule be applied in only one pass over the
+ // plan?
+ LOAD_FIELDS.add(new InlineVariablesRule());
+ // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
+ LOAD_FIELDS.add(new ComplexJoinInferenceRule());
+ LOAD_FIELDS.add(new InferTypesRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ OP_PUSHDOWN.add(new PushProjectDownRule());
+ OP_PUSHDOWN.add(new PushSelectDownRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ DATA_EXCHANGE.add(new SetExecutionModeRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ CONSOLIDATION.add(new RemoveRedundantProjectionRule());
+ CONSOLIDATION.add(new ConsolidateSelectsRule());
+ CONSOLIDATION.add(new IntroduceEarlyProjectRule());
+ CONSOLIDATION.add(new ConsolidateAssignsRule());
+ CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
+ CONSOLIDATION.add(new IntroduceAggregateCombinerRule());
+ CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
+ HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
+ prepareJobGenRules.add(new ExtractCommonOperatorsRule());
+ prepareJobGenRules.add(new LocalGroupByRule());
+ prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new SetExecutionModeRule());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/pom.xml b/hivesterix/hivesterix-runtime/pom.xml
index e4f5416..6d10286 100644
--- a/hivesterix/hivesterix-runtime/pom.xml
+++ b/hivesterix/hivesterix-runtime/pom.xml
@@ -20,16 +20,14 @@
<parent>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<dependencies>
- <dependency>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- <version>2.5</version>
- <type>jar</type>
- <scope>compile</scope>
+ <dependency>
+ <groupId>sqlline</groupId>
+ <artifactId>sqlline</artifactId>
+ <version>1_0_2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
@@ -38,285 +36,105 @@
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
- <type>jar</type>
- <scope>compile</scope>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-hbase-handler</artifactId>
+ <version>0.11.0</version>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-cc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hyracks-control-nc</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-serde</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
@@ -384,5 +202,19 @@
<id>hyracks-public-release</id>
<url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
</repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>plugins-release</id>
+ <url>http://repo.springsource.org/plugins-release</url>
+ </repository>
</repositories>
</project>
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
index a5177c9..dd4fbe7 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
@@ -34,6 +34,7 @@
import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
private List<ICopyEvaluator> children;
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
index d061b23..87d2221 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
@@ -36,6 +36,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
index f4b77b8..3f1cc27 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
@@ -35,6 +35,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class AggregatuibFunctionSerializableEvaluator implements ICopySerializableAggregateFunction {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
index d91b806..b511d87 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
@@ -35,6 +35,7 @@
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+@SuppressWarnings("deprecation")
public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
/**
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
index 09f0cb6..d65dc24 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
@@ -12,370 +12,383 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
- aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
- Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
- throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i].initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
- provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
- udafPartial, udafComplete, outputInspector, outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: " + this.genericUDAFName;
- }
-}
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+@SuppressWarnings("deprecation")
+public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else {
+ types.add(type);
+ }
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
+ throws AlgebricksException {
+ /**
+ * list of object inspectors correlated to types
+ */
+ List<ObjectInspector> oiListForTypes = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : types) {
+ oiListForTypes.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
+ provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
+ udafPartial, udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
index 71d11c0..c1ee814 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
@@ -39,12 +39,14 @@
import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+@SuppressWarnings("deprecation")
public class AggregationFunctionSerializableFactory implements ICopySerializableAggregateFunctionFactory {
private static final long serialVersionUID = 1L;
@@ -190,10 +192,19 @@
String s = Utilities.serializeExpression(input);
parametersSerialization.add(s);
}
+
}
@Override
public synchronized ICopySerializableAggregateFunction createAggregateFunction() throws AlgebricksException {
+ /**
+ * list of object inspectors correlated to types
+ */
+ List<ObjectInspector> oiListForTypes = new ArrayList<ObjectInspector>();
+ for (TypeInfo type : types) {
+ oiListForTypes.add(LazyUtils.getLazyObjectInspectorFromTypeInfo(type, false));
+ }
+
if (parametersOrigin == null) {
Configuration config = new Configuration();
config.setClassLoader(this.getClass().getClassLoader());
@@ -328,7 +339,8 @@
udafComplete = udafsComplete.get(threadId);
if (udafComplete == null) {
try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
} catch (HiveException e) {
throw new AlgebricksException(e);
}
@@ -352,7 +364,8 @@
udafPartial = udafsPartial.get(threadId);
if (udafPartial == null) {
try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, oiListForTypes, distinct,
+ false);
} catch (HiveException e) {
throw new AlgebricksException(e);
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
index e89a4c4..4bbb21f 100644
--- a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
@@ -12,156 +12,176 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.runtime.operator.filewrite;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-@SuppressWarnings("deprecation")
-public class HiveFileWritePushRuntime implements IPushRuntime {
-
- /**
- * frame tuple accessor to access byte buffer
- */
- private final FrameTupleAccessor accessor;
-
- /**
- * input object inspector
- */
- private final ObjectInspector inputInspector;
-
- /**
- * cachedInput
- */
- private final LazyColumnar cachedInput;
-
- /**
- * File sink operator of Hive
- */
- private final FileSinkDesc fileSink;
-
- /**
- * job configuration, which contain name node and other configuration
- * information
- */
- private JobConf conf;
-
- /**
- * input object inspector
- */
- private final Schema inputSchema;
-
- /**
- * a copy of hive schema representation
- */
- private RowSchema rowSchema;
-
- /**
- * the Hive file sink operator
- */
- private FileSinkOperator fsOp;
-
- /**
- * cached tuple object reference
- */
- private FrameTupleReference tuple = new FrameTupleReference();
-
- /**
- * @param spec
- * @param fsProvider
- */
- public HiveFileWritePushRuntime(IHyracksTaskContext context,
- RecordDescriptor inputRecordDesc, JobConf job, FileSinkDesc fs,
- RowSchema schema, Schema oi) {
- fileSink = fs;
- fileSink.setGatherStats(false);
-
- rowSchema = schema;
- conf = job;
- inputSchema = oi;
-
- accessor = new FrameTupleAccessor(context.getFrameSize(),
- inputRecordDesc);
- inputInspector = inputSchema.toObjectInspector();
- cachedInput = new LazyColumnar(
- (LazyColumnarObjectInspector) inputInspector);
- }
-
- @Override
- public void open() throws HyracksDataException {
- fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
- fsOp.setChildOperators(null);
- fsOp.setParentOperators(null);
- conf.setClassLoader(this.getClass().getClassLoader());
-
- ObjectInspector[] inspectors = new ObjectInspector[1];
- inspectors[0] = inputInspector;
- try {
- fsOp.initialize(conf, inspectors);
- fsOp.setExecContext(null);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- accessor.reset(buffer);
- int n = accessor.getTupleCount();
- try {
- for (int i = 0; i < n; ++i) {
- tuple.reset(accessor, i);
- cachedInput.init(tuple);
- fsOp.process(cachedInput, 0);
- }
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close() throws HyracksDataException {
- try {
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
- fsOp.closeOp(false);
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void setFrameWriter(int index, IFrameWriter writer,
- RecordDescriptor recordDesc) {
- throw new IllegalStateException();
- }
-
- @Override
- public void setInputRecordDescriptor(int index,
- RecordDescriptor recordDescriptor) {
- }
-
- @Override
- public void fail() throws HyracksDataException {
-
- }
-
-}
+package edu.uci.ics.hivesterix.runtime.operator.filewrite;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.logging.Logger;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+
+@SuppressWarnings("deprecation")
+public class HiveFileWritePushRuntime implements IPushRuntime {
+ private final static Logger LOGGER = Logger.getLogger(HiveFileWritePushRuntime.class.getName());
+
+ /**
+ * frame tuple accessor to access byte buffer
+ */
+ private final FrameTupleAccessor accessor;
+
+ /**
+ * input object inspector
+ */
+ private final ObjectInspector inputInspector;
+
+ /**
+ * cachedInput
+ */
+ private final LazyColumnar cachedInput;
+
+ /**
+ * File sink operator of Hive
+ */
+ private final FileSinkDesc fileSink;
+
+ /**
+ * job configuration, which contain name node and other configuration
+ * information
+ */
+ private JobConf conf;
+
+ /**
+ * input object inspector
+ */
+ private final Schema inputSchema;
+
+ /**
+ * a copy of hive schema representation
+ */
+ private RowSchema rowSchema;
+
+ /**
+ * the Hive file sink operator
+ */
+ private FileSinkOperator fsOp;
+
+ /**
+ * cached tuple object reference
+ */
+ private FrameTupleReference tuple = new FrameTupleReference();
+
+ /**
+ * @param spec
+ * @param fsProvider
+ */
+ public HiveFileWritePushRuntime(IHyracksTaskContext context, RecordDescriptor inputRecordDesc, JobConf job,
+ FileSinkDesc fs, RowSchema schema, Schema oi) {
+ fileSink = fs;
+ fileSink.setGatherStats(false);
+
+ rowSchema = schema;
+ conf = job;
+ inputSchema = oi;
+
+ accessor = new FrameTupleAccessor(context.getFrameSize(), inputRecordDesc);
+ inputInspector = inputSchema.toObjectInspector();
+ cachedInput = new LazyColumnar((LazyColumnarObjectInspector) inputInspector);
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
+ fsOp.setChildOperators(null);
+ fsOp.setParentOperators(null);
+ conf.setClassLoader(this.getClass().getClassLoader());
+
+ ObjectInspector[] inspectors = new ObjectInspector[1];
+ inspectors[0] = inputInspector;
+ try {
+ fsOp.initialize(conf, inspectors);
+ fsOp.setExecContext(null);
+ createTempDir();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int n = accessor.getTupleCount();
+ try {
+ for (int i = 0; i < n; ++i) {
+ tuple.reset(accessor, i);
+ cachedInput.init(tuple);
+ fsOp.process(cachedInput, 0);
+ }
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ try {
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ fsOp.closeOp(false);
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void setFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void setInputRecordDescriptor(int index, RecordDescriptor recordDescriptor) {
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+
+ }
+
+ private void createTempDir() throws IOException {
+ FileSinkDesc fdesc = fsOp.getConf();
+ String tempDir = fdesc.getDirName();
+ if (tempDir != null) {
+ Path tempPath = Utilities.toTempPath(new Path(tempDir));
+ FileSystem fs = tempPath.getFileSystem(conf);
+ if (!fs.exists(tempPath)) {
+ try {
+ fs.mkdirs(tempPath);
+ ShimLoader.getHadoopShims().fileSystemDeleteOnExit(fs, tempPath);
+ } catch (IOException e) {
+ //if the dir already exists, that should be fine; so log a warning msg
+ LOGGER.warning("create tmp result directory fails.");
+ }
+ }
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/pom.xml b/hivesterix/hivesterix-serde/pom.xml
index 97c9174..b53661b 100644
--- a/hivesterix/hivesterix-serde/pom.xml
+++ b/hivesterix/hivesterix-serde/pom.xml
@@ -18,7 +18,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<artifactId>hivesterix-serde</artifactId>
@@ -42,37 +42,37 @@
<dependencies>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
+ <groupId>org.apache.hive</groupId>
<artifactId>hive-common</artifactId>
- <version>0.7.0</version>
+ <version>0.11.0</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.10-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>3.8.1</version>
+ <version>4.8.1</version>
<scope>test</scope>
</dependency>
</dependencies>
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
index b5d64e8..2bbb1d5 100644
--- a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
@@ -46,6 +46,7 @@
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -74,6 +75,7 @@
* deserialized until required. Binary means a field is serialized in binary
* compact format.
*/
+@SuppressWarnings("deprecation")
public class LazySerDe implements SerDe {
public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
@@ -140,7 +142,6 @@
/**
* Deserialize a table record to a Lazy struct.
*/
- @SuppressWarnings("deprecation")
@Override
public Object deserialize(Writable field) throws SerDeException {
if (byteArrayRef == null) {
@@ -471,4 +472,9 @@
}
}
}
+
+ @Override
+ public SerDeStats getSerDeStats() {
+ return null;
+ }
}
diff --git a/hivesterix/hivesterix-translator/pom.xml b/hivesterix/hivesterix-translator/pom.xml
index 8a24d5e..d8c205f 100644
--- a/hivesterix/hivesterix-translator/pom.xml
+++ b/hivesterix/hivesterix-translator/pom.xml
@@ -21,7 +21,7 @@
<parent>
<artifactId>hivesterix</artifactId>
<groupId>edu.uci.ics.hyracks</groupId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
</parent>
<build>
@@ -42,30 +42,23 @@
<dependencies>
<dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>algebricks-compiler</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-common</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-runtime</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
index f32d85b..76cc51d 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
@@ -12,810 +12,809 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan;
-
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
-
-@SuppressWarnings("rawtypes")
-public class HiveAlgebricksTranslator implements Translator {
-
- private int currentVariable = 0;
-
- private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
-
- private boolean continueTraverse = true;
-
- private IMetadataProvider<PartitionDesc, Object> metaData;
-
- /**
- * map variable name to the logical variable
- */
- private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map field name to LogicalVariable
- */
- private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map logical variable to name
- */
- private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
-
- /**
- * asterix root operators
- */
- private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
-
- /**
- * a list of visitors
- */
- private List<Visitor> visitors = new ArrayList<Visitor>();
-
- /**
- * output writer to print things out
- */
- private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));
-
- /**
- * map a logical variable to type info
- */
- private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
-
- @Override
- public LogicalVariable getVariable(String fieldName, TypeInfo type) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- currentVariable++;
- var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- }
- return var;
- }
-
- @Override
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
- currentVariable++;
- LogicalVariable var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- return var;
- }
-
- @Override
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
- String name = this.logicalVariableToFieldMap.get(oldVar);
- if (name != null) {
- fieldToLogicalVariableMap.put(name, newVar);
- nameToLogicalVariableMap.put(newVar.toString(), newVar);
- nameToLogicalVariableMap.put(oldVar.toString(), newVar);
- logicalVariableToFieldMap.put(newVar, name);
- }
- }
-
- @Override
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
- return metaData;
- }
-
- /**
- * only get an variable, without rewriting it
- *
- * @param fieldName
- * @return
- */
- private LogicalVariable getVariableOnly(String fieldName) {
- return fieldToLogicalVariableMap.get(fieldName);
- }
-
- private void updateVariable(String fieldName, LogicalVariable variable) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- } else if (!var.equals(variable)) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- }
- }
-
- /**
- * get a list of logical variables from the schema
- *
- * @param schema
- * @return
- */
- @Override
- public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
- List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- List<String> names = schema.getNames();
-
- for (String name : names)
- variables.add(nameToLogicalVariableMap.get(name));
- return variables;
- }
-
- /**
- * get variable to typeinfo map
- *
- * @return
- */
- public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
- return this.variableToType;
- }
-
- /**
- * get the number of variables s
- *
- * @return
- */
- public int getVariableCounter() {
- return currentVariable + 1;
- }
-
- /**
- * translate from hive operator tree to asterix operator tree
- *
- * @param hive
- * roots
- * @return Algebricks roots
- */
- public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,
- HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {
- /**
- * register visitors
- */
- visitors.add(new FilterVisitor());
- visitors.add(new GroupByVisitor());
- visitors.add(new JoinVisitor());
- visitors.add(new LateralViewJoinVisitor());
- visitors.add(new UnionVisitor());
- visitors.add(new LimitVisitor());
- visitors.add(new MapJoinVisitor());
- visitors.add(new ProjectVisitor());
- visitors.add(new SortVisitor());
- visitors.add(new ExtractVisitor());
- visitors.add(new TableScanWriteVisitor(aliasToPathMap));
-
- List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(
- parentOperator));
- insertReplicateOperator(refList);
- if (refList != null)
- rootOperators.addAll(refList);
- }
-
- /**
- * translate operator DAG
- *
- * @param hiveRoot
- * @param AlgebricksParentOperator
- * @return
- */
- private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
- Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {
-
- for (Operator hiveOperator : hiveRoot) {
- continueTraverse = true;
- Mutable<ILogicalOperator> currentOperatorRef = null;
- if (hiveOperator.getType() == OperatorType.FILTER) {
- FilterOperator fop = (FilterOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
- ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.JOIN) {
- JoinOperator fop = (JoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
- LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
- MapJoinOperator fop = (MapJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.SELECT) {
- SelectOperator fop = (SelectOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
- ExtractOperator fop = (ExtractOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
- GroupByOperator fop = (GroupByOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
- TableScanOperator fop = (TableScanOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.FILESINK) {
- FileSinkOperator fop = (FileSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.LIMIT) {
- LimitOperator lop = (LimitOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UDTF) {
- UDTFOperator lop = (UDTFOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UNION) {
- UnionOperator lop = (UnionOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- } else
- ;
- if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0
- && continueTraverse) {
- @SuppressWarnings("unchecked")
- List<Operator> children = hiveOperator.getChildOperators();
- if (currentOperatorRef == null)
- currentOperatorRef = AlgebricksParentOperator;
- translate(children, currentOperatorRef);
- }
- if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)
- logicalOp.add(currentOperatorRef);
- }
- return logicalOp;
- }
-
- /**
- * used in select, group by to get no-column-expression columns
- *
- * @param cols
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
- ArrayList<LogicalVariable> variables) {
-
- ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
-
- /**
- * variables to be appended in the assign operator
- */
- ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
-
- // one variable can only be assigned once
- for (ExprNodeDesc hiveExpr : cols) {
- rewriteExpression(hiveExpr);
-
- if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
- String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();
-
- // System.out.println("project expr: " + fieldName);
-
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());
- desc2.setColumn(var.toString());
- desc2.setTabAlias("");
- variables.add(var);
- } else {
- LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());
- String name = this.logicalVariableToFieldMap.get(var);
- var = this.getVariableOnly(name);
- variables.add(var);
- }
- } else {
- Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
- expressions.add(asterixExpr);
- LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),
- hiveExpr.getTypeInfo());
- variables.add(var);
- appendedVariables.add(var);
- }
- }
-
- /**
- * create an assign operator to deal with appending
- */
- ILogicalOperator assignOp = null;
- if (appendedVariables.size() > 0) {
- assignOp = new AssignOperator(appendedVariables, expressions);
- assignOp.getInputs().add(parent);
- }
- return assignOp;
- }
-
- private ILogicalPlan plan;
-
- public ILogicalPlan genLogicalPlan() {
- plan = new ALogicalPlanImpl(rootOperators);
- return plan;
- }
-
- public void printOperators() throws AlgebricksException {
- LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
- StringBuilder buffer = new StringBuilder();
- PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
- outputWriter.println(buffer);
- outputWriter.println("rewritten variables: ");
- outputWriter.flush();
- printVariables();
-
- }
-
- public static void setOutputPrinter(PrintWriter writer) {
- outputWriter = writer;
- }
-
- private void printVariables() {
- Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();
-
- for (Entry<String, LogicalVariable> entry : entries) {
- outputWriter.println(entry.getKey() + " -> " + entry.getValue());
- }
- outputWriter.flush();
- }
-
- /**
- * generate the object inspector for the output of an operator
- *
- * @param operator
- * The Hive operator
- * @return an ObjectInspector object
- */
- public Schema generateInputSchema(Operator operator) {
- List<String> variableNames = new ArrayList<String>();
- List<TypeInfo> typeList = new ArrayList<TypeInfo>();
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo col : columns) {
- // typeList.add();
- TypeInfo type = col.getType();
- typeList.add(type);
-
- String fieldName = col.getInternalName();
- variableNames.add(fieldName);
- }
-
- return new Schema(variableNames, typeList);
- }
-
- /**
- * rewrite the names of output columns for feature expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator) {
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo column : columns) {
- String columnName = column.getTabAlias() + "." + column.getInternalName();
- if (columnName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(columnName, column.getType());
- column.setInternalName(var.toString());
- }
- }
- }
-
- @Override
- public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
-
- //printOperatorSchema(operator);
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- if (variables.size() != columns.size()) {
- throw new IllegalStateException("output cardinality error " + operator.getName() + " variable size: "
- + variables.size() + " expected " + columns.size());
- }
-
- for (int i = 0; i < variables.size(); i++) {
- LogicalVariable var = variables.get(i);
- ColumnInfo column = columns.get(i);
- String fieldName = column.getTabAlias() + "." + column.getInternalName();
- if (fieldName.indexOf("$$") < 0) {
- updateVariable(fieldName, var);
- column.setInternalName(var.toString());
- }
- }
- //printOperatorSchema(operator);
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "null." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- throw new IllegalStateException(fieldName + " is wrong!!! ");
- }
- }
- }
- String name = this.logicalVariableToFieldMap.get(var);
- var = getVariableOnly(name);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpression(desc);
- }
- }
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpressionPartial(desc);
- }
- }
- }
-
- // private void printOperatorSchema(Operator operator) {
- // // System.out.println(operator.getName());
- // // List<ColumnInfo> columns = operator.getSchema().getSignature();
- // // for (ColumnInfo column : columns) {
- // // System.out.print(column.getTabAlias() + "." +
- // // column.getInternalName() + " ");
- // // }
- // // System.out.println();
- // }
-
- /**
- * translate scalar function expression
- *
- * @param hiveExpr
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {
- ILogicalExpression AlgebricksExpr;
-
- if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = hiveExpr.getChildren();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
- GenericUDF genericUdf = funcExpr.getGenericUDF();
- UDF udf = null;
- if (genericUdf instanceof GenericUDFBridge) {
- GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
- try {
- udf = bridge.getUdfClass().newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- /**
- * set up the hive function
- */
- Object hiveFunction = genericUdf;
- if (udf != null)
- hiveFunction = udf;
-
- FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction
- .getClass());
- if (funcId == null) {
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());
- }
-
- Object functionInfo = null;
- if (genericUdf instanceof GenericUDFBridge) {
- functionInfo = funcExpr;
- }
-
- /**
- * generate the function call expression
- */
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, functionInfo), arguments);
- AlgebricksExpr = AlgebricksFuncExpr;
-
- } else if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
- LogicalVariable var = this.getVariable(column.getColumn());
- AlgebricksExpr = new VariableReferenceExpression(var);
-
- } else if (hiveExpr instanceof ExprNodeFieldDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, hiveExpr));
- AlgebricksExpr = AlgebricksFuncExpr;
- } else if (hiveExpr instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
- Object value = hiveConst.getValue();
- AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));
- } else if (hiveExpr instanceof ExprNodeNullDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
- funcId, hiveExpr));
-
- AlgebricksExpr = AlgebricksFuncExpr;
- } else {
- throw new IllegalStateException("unknown hive expression");
- }
- return new MutableObject<ILogicalExpression>(AlgebricksExpr);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {
-
- String UDAFName = aggregateDesc.getGenericUDAFName();
-
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = aggregateDesc.getParameters();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
- + aggregateDesc.getMode() + ")");
- HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
- AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,
- arguments);
- return new MutableObject<ILogicalExpression>(aggregationExpression);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
-
- String UDTFName = udtfDesc.getUDTFName();
-
- FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);
- UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(
- funcId, udtfDesc));
- unnestingExpression.getArguments().add(argument);
- return new MutableObject<ILogicalExpression>(unnestingExpression);
- }
-
- /**
- * get typeinfo
- */
- @Override
- public TypeInfo getType(LogicalVariable var) {
- return variableToType.get(var);
- }
-
- /**
- * get variable from variable name
- */
- @Override
- public LogicalVariable getVariable(String name) {
- return nameToLogicalVariableMap.get(name);
- }
-
- @Override
- public LogicalVariable getVariableFromFieldName(String fieldName) {
- return this.getVariableOnly(fieldName);
- }
-
- /**
- * set the metadata provider
- */
- @Override
- public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {
- this.metaData = metadata;
- }
-
- /**
- * insert ReplicateOperator when necessary
- */
- private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
- buildChildToParentsMapping(roots, childToParentsMap);
- for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {
- List<Mutable<ILogicalOperator>> pList = entry.getValue();
- if (pList.size() > 1) {
- ILogicalOperator rop = new ReplicateOperator(pList.size());
- Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);
- Mutable<ILogicalOperator> childRef = entry.getKey();
- rop.getInputs().add(childRef);
- for (Mutable<ILogicalOperator> parentRef : pList) {
- ILogicalOperator parentOp = parentRef.getValue();
- int index = parentOp.getInputs().indexOf(childRef);
- parentOp.getInputs().set(index, ropRef);
- }
- }
- }
- }
-
- /**
- * build the mapping from child to Parents
- *
- * @param roots
- * @param childToParentsMap
- */
- private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
- for (Mutable<ILogicalOperator> opRef : roots) {
- List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();
- for (Mutable<ILogicalOperator> childRef : childRefs) {
- List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
- if (parentList == null) {
- parentList = new ArrayList<Mutable<ILogicalOperator>>();
- map.put(childRef, parentList);
- }
- if (!parentList.contains(opRef))
- parentList.add(opRef);
- }
- buildChildToParentsMapping(childRefs, map);
- }
- }
-}
+package edu.uci.ics.hivesterix.logical.plan;
+
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
+import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
+import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
+import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
+
+@SuppressWarnings("rawtypes")
+public class HiveAlgebricksTranslator implements Translator {
+
+ private int currentVariable = 0;
+
+ private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
+
+ private boolean continueTraverse = true;
+
+ private IMetadataProvider<PartitionDesc, Object> metaData;
+
+ /**
+ * map variable name to the logical variable
+ */
+ private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map field name to LogicalVariable
+ */
+ private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map logical variable to name
+ */
+ private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
+
+ /**
+ * asterix root operators
+ */
+ private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
+
+ /**
+ * a list of visitors
+ */
+ private List<Visitor> visitors = new ArrayList<Visitor>();
+
+ /**
+ * output writer to print things out
+ */
+ private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));
+
+ /**
+ * map a logical variable to type info
+ */
+ private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
+
+ @Override
+ public LogicalVariable getVariable(String fieldName, TypeInfo type) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ currentVariable++;
+ var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ }
+ return var;
+ }
+
+ @Override
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
+ currentVariable++;
+ LogicalVariable var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ return var;
+ }
+
+ @Override
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
+ String name = this.logicalVariableToFieldMap.get(oldVar);
+ if (name != null) {
+ fieldToLogicalVariableMap.put(name, newVar);
+ nameToLogicalVariableMap.put(newVar.toString(), newVar);
+ nameToLogicalVariableMap.put(oldVar.toString(), newVar);
+ logicalVariableToFieldMap.put(newVar, name);
+ }
+ }
+
+ @Override
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
+ return metaData;
+ }
+
+ /**
+ * only get an variable, without rewriting it
+ *
+ * @param fieldName
+ * @return
+ */
+ private LogicalVariable getVariableOnly(String fieldName) {
+ return fieldToLogicalVariableMap.get(fieldName);
+ }
+
+ public void updateVariable(String fieldName, LogicalVariable variable) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ } else if (!var.equals(variable)) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ }
+ }
+
+ /**
+ * get a list of logical variables from the schema
+ *
+ * @param schema
+ * @return
+ */
+ @Override
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
+ List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ List<String> names = schema.getNames();
+
+ for (String name : names)
+ variables.add(nameToLogicalVariableMap.get(name));
+ return variables;
+ }
+
+ /**
+ * get variable to typeinfo map
+ *
+ * @return
+ */
+ public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
+ return this.variableToType;
+ }
+
+ /**
+ * get the number of variables s
+ *
+ * @return
+ */
+ public int getVariableCounter() {
+ return currentVariable + 1;
+ }
+
+ /**
+ * translate from hive operator tree to asterix operator tree
+ *
+ * @param hive
+ * roots
+ * @return Algebricks roots
+ */
+ public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,
+ HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {
+ /**
+ * register visitors
+ */
+ visitors.add(new FilterVisitor());
+ visitors.add(new GroupByVisitor());
+ visitors.add(new JoinVisitor());
+ visitors.add(new LateralViewJoinVisitor());
+ visitors.add(new UnionVisitor());
+ visitors.add(new LimitVisitor());
+ visitors.add(new MapJoinVisitor());
+ visitors.add(new ProjectVisitor());
+ visitors.add(new SortVisitor());
+ visitors.add(new ExtractVisitor());
+ visitors.add(new TableScanWriteVisitor(aliasToPathMap));
+
+ List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(
+ parentOperator));
+ insertReplicateOperator(refList);
+ if (refList != null)
+ rootOperators.addAll(refList);
+ }
+
+ /**
+ * translate operator DAG
+ *
+ * @param hiveRoot
+ * @param AlgebricksParentOperator
+ * @return
+ */
+ private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
+ Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {
+
+ for (Operator hiveOperator : hiveRoot) {
+ continueTraverse = true;
+ Mutable<ILogicalOperator> currentOperatorRef = null;
+ if (hiveOperator.getType() == OperatorType.FILTER) {
+ FilterOperator fop = (FilterOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
+ ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.JOIN) {
+ JoinOperator fop = (JoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
+ LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
+ MapJoinOperator fop = (MapJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.SELECT) {
+ SelectOperator fop = (SelectOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
+ ExtractOperator fop = (ExtractOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
+ GroupByOperator fop = (GroupByOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
+ TableScanOperator fop = (TableScanOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.FILESINK) {
+ FileSinkOperator fop = (FileSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.LIMIT) {
+ LimitOperator lop = (LimitOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UDTF) {
+ UDTFOperator lop = (UDTFOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UNION) {
+ UnionOperator lop = (UnionOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ } else
+ ;
+ if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0
+ && continueTraverse) {
+ @SuppressWarnings("unchecked")
+ List<Operator> children = hiveOperator.getChildOperators();
+ if (currentOperatorRef == null)
+ currentOperatorRef = AlgebricksParentOperator;
+ translate(children, currentOperatorRef);
+ }
+ if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)
+ logicalOp.add(currentOperatorRef);
+ }
+ return logicalOp;
+ }
+
+ /**
+ * used in select, group by to get no-column-expression columns
+ *
+ * @param cols
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables) {
+
+ ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
+
+ /**
+ * variables to be appended in the assign operator
+ */
+ ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
+
+ // one variable can only be assigned once
+ for (ExprNodeDesc hiveExpr : cols) {
+ rewriteExpression(hiveExpr);
+
+ if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
+ String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();
+
+ // System.out.println("project expr: " + fieldName);
+
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());
+ desc2.setColumn(var.toString());
+ desc2.setTabAlias("");
+ variables.add(var);
+ } else {
+ LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = this.getVariableOnly(name);
+ variables.add(var);
+ }
+ } else {
+ Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
+ expressions.add(asterixExpr);
+ LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),
+ hiveExpr.getTypeInfo());
+ variables.add(var);
+ appendedVariables.add(var);
+ }
+ }
+
+ /**
+ * create an assign operator to deal with appending
+ */
+ ILogicalOperator assignOp = null;
+ if (appendedVariables.size() > 0) {
+ assignOp = new AssignOperator(appendedVariables, expressions);
+ assignOp.getInputs().add(parent);
+ }
+ return assignOp;
+ }
+
+ private ILogicalPlan plan;
+
+ public ILogicalPlan genLogicalPlan() {
+ plan = new ALogicalPlanImpl(rootOperators);
+ return plan;
+ }
+
+ public void printOperators() throws AlgebricksException {
+ LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
+ StringBuilder buffer = new StringBuilder();
+ PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
+ outputWriter.println(buffer);
+ outputWriter.println("rewritten variables: ");
+ outputWriter.flush();
+ printVariables();
+
+ }
+
+ public static void setOutputPrinter(PrintWriter writer) {
+ outputWriter = writer;
+ }
+
+ private void printVariables() {
+ Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();
+
+ for (Entry<String, LogicalVariable> entry : entries) {
+ outputWriter.println(entry.getKey() + " -> " + entry.getValue());
+ }
+ outputWriter.flush();
+ }
+
+ /**
+ * generate the object inspector for the output of an operator
+ *
+ * @param operator
+ * The Hive operator
+ * @return an ObjectInspector object
+ */
+ public Schema generateInputSchema(Operator operator) {
+ List<String> variableNames = new ArrayList<String>();
+ List<TypeInfo> typeList = new ArrayList<TypeInfo>();
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+
+ for (ColumnInfo col : columns) {
+ // typeList.add();
+ TypeInfo type = col.getType();
+ typeList.add(type);
+
+ String fieldName = col.getInternalName();
+ variableNames.add(fieldName);
+ }
+
+ return new Schema(variableNames, typeList);
+ }
+
+ /**
+ * rewrite the names of output columns for feature expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator) {
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ for (ColumnInfo column : columns) {
+ String columnName = column.getTabAlias() + "." + column.getInternalName();
+ if (columnName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(columnName, column.getType());
+ column.setInternalName(var.toString());
+ }
+ }
+ }
+
+ @Override
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
+ // printOperatorSchema(operator);
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // if (variables.size() != columns.size()) {
+ // throw new IllegalStateException("output cardinality error " +
+ // operator.getName() + " variable size: "
+ // + variables.size() + " expected " + columns.size());
+ // }
+ for (int i = 0; i < variables.size(); i++) {
+ LogicalVariable var = variables.get(i);
+ ColumnInfo column = columns.get(i);
+ String fieldName = column.getTabAlias() + "." + column.getInternalName();
+ if (fieldName.indexOf("$$") < 0) {
+ updateVariable(fieldName, var);
+ column.setInternalName(var.toString());
+ }
+ }
+
+ // printOperatorSchema(operator);
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "null." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ throw new IllegalStateException(fieldName + " is wrong!!! ");
+ }
+ }
+ }
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = getVariableOnly(name);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpression(desc);
+ }
+ }
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpressionPartial(desc);
+ }
+ }
+ }
+
+ // private void printOperatorSchema(Operator operator) {
+ // // System.out.println(operator.getName());
+ // // List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // // for (ColumnInfo column : columns) {
+ // // System.out.print(column.getTabAlias() + "." +
+ // // column.getInternalName() + " ");
+ // // }
+ // // System.out.println();
+ // }
+
+ /**
+ * translate scalar function expression
+ *
+ * @param hiveExpr
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {
+ ILogicalExpression AlgebricksExpr;
+
+ if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = hiveExpr.getChildren();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
+ GenericUDF genericUdf = funcExpr.getGenericUDF();
+ UDF udf = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
+ try {
+ udf = bridge.getUdfClass().newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * set up the hive function
+ */
+ Object hiveFunction = genericUdf;
+ if (udf != null)
+ hiveFunction = udf;
+
+ FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction
+ .getClass());
+ if (funcId == null) {
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());
+ }
+
+ Object functionInfo = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ functionInfo = funcExpr;
+ }
+
+ /**
+ * generate the function call expression
+ */
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, functionInfo), arguments);
+ AlgebricksExpr = AlgebricksFuncExpr;
+
+ } else if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
+ LogicalVariable var = this.getVariable(column.getColumn());
+ AlgebricksExpr = new VariableReferenceExpression(var);
+
+ } else if (hiveExpr instanceof ExprNodeFieldDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else if (hiveExpr instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
+ Object value = hiveConst.getValue();
+ AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));
+ } else if (hiveExpr instanceof ExprNodeNullDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else {
+ throw new IllegalStateException("unknown hive expression");
+ }
+ return new MutableObject<ILogicalExpression>(AlgebricksExpr);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {
+
+ String UDAFName = aggregateDesc.getGenericUDAFName();
+
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = aggregateDesc.getParameters();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + aggregateDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,
+ arguments);
+ return new MutableObject<ILogicalExpression>(aggregationExpression);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
+
+ String UDTFName = udtfDesc.getUDTFName();
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);
+ UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(
+ funcId, udtfDesc));
+ unnestingExpression.getArguments().add(argument);
+ return new MutableObject<ILogicalExpression>(unnestingExpression);
+ }
+
+ /**
+ * get typeinfo
+ */
+ @Override
+ public TypeInfo getType(LogicalVariable var) {
+ return variableToType.get(var);
+ }
+
+ /**
+ * get variable from variable name
+ */
+ @Override
+ public LogicalVariable getVariable(String name) {
+ return nameToLogicalVariableMap.get(name);
+ }
+
+ @Override
+ public LogicalVariable getVariableFromFieldName(String fieldName) {
+ return this.getVariableOnly(fieldName);
+ }
+
+ /**
+ * set the metadata provider
+ */
+ @Override
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {
+ this.metaData = metadata;
+ }
+
+ /**
+ * insert ReplicateOperator when necessary
+ */
+ private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
+ buildChildToParentsMapping(roots, childToParentsMap);
+ for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {
+ List<Mutable<ILogicalOperator>> pList = entry.getValue();
+ if (pList.size() > 1) {
+ ILogicalOperator rop = new ReplicateOperator(pList.size());
+ Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);
+ Mutable<ILogicalOperator> childRef = entry.getKey();
+ rop.getInputs().add(childRef);
+ for (Mutable<ILogicalOperator> parentRef : pList) {
+ ILogicalOperator parentOp = parentRef.getValue();
+ int index = parentOp.getInputs().indexOf(childRef);
+ parentOp.getInputs().set(index, ropRef);
+ }
+ }
+ }
+ }
+
+ /**
+ * build the mapping from child to Parents
+ *
+ * @param roots
+ * @param childToParentsMap
+ */
+ private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
+ for (Mutable<ILogicalOperator> opRef : roots) {
+ List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();
+ for (Mutable<ILogicalOperator> childRef : childRefs) {
+ List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
+ if (parentList == null) {
+ parentList = new ArrayList<Mutable<ILogicalOperator>>();
+ map.put(childRef, parentList);
+ }
+ if (!parentList.contains(opRef))
+ parentList.add(opRef);
+ }
+ buildChildToParentsMapping(childRefs, map);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
index f4161a4..aa1837c 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
@@ -12,113 +12,145 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-/**
- * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
- * This operator was implemented with the following operator DAG in mind.
- * For a query such as
- * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
- * adid
- * The top of the operator DAG will look similar to
- * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
- * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
- * ....
- * Rows from the table scan operator are first to a lateral view forward
- * operator that just forwards the row and marks the start of a LV. The select
- * operator on the left picks all the columns while the select operator on the
- * right picks only the columns needed by the UDTF.
- * The output of select in the left branch and output of the UDTF in the right
- * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
- * will generate > 1 row for every row received from the TS, while the left
- * select operator will generate only one. For each row output from the TS, the
- * LVJ outputs all possible rows that can be created by joining the row from the
- * left select and one of the rows output from the UDTF.
- * Additional lateral views can be supported by adding a similar DAG after the
- * previous LVJ operator.
- */
-
-@SuppressWarnings("rawtypes")
-public class LateralViewJoinVisitor extends DefaultVisitor {
-
- private UDTFDesc udtf;
-
- private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
-
- parents.add(AlgebricksParentOperatorRef);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- Operator parent0 = operator.getParentOperators().get(0);
- ILogicalOperator parentOperator;
- ILogicalExpression unnestArg;
- if (parent0 instanceof UDTFOperator) {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(1).getValue(), unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(1).getValue();
- } else {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(0).getValue(), unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(0).getValue();
- }
-
- LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
-
- Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
- unnestArg));
- ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
-
- List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parentOperator, outputVars);
- outputVars.add(var);
- currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(parentOperator));
-
- parents.clear();
- udtf = null;
- t.rewriteOperatorOutputSchema(outputVars, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
- udtf = (UDTFDesc) operator.getConf();
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+/**
+ * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
+ * This operator was implemented with the following operator DAG in mind.
+ * For a query such as
+ * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
+ * adid
+ * The top of the operator DAG will look similar to
+ * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
+ * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
+ * ....
+ * Rows from the table scan operator are first to a lateral view forward
+ * operator that just forwards the row and marks the start of a LV. The select
+ * operator on the left picks all the columns while the select operator on the
+ * right picks only the columns needed by the UDTF.
+ * The output of select in the left branch and output of the UDTF in the right
+ * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
+ * will generate > 1 row for every row received from the TS, while the left
+ * select operator will generate only one. For each row output from the TS, the
+ * LVJ outputs all possible rows that can be created by joining the row from the
+ * left select and one of the rows output from the UDTF.
+ * Additional lateral views can be supported by adding a similar DAG after the
+ * previous LVJ operator.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LateralViewJoinVisitor extends DefaultVisitor {
+
+ private UDTFDesc udtf;
+
+ private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperatorRef);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ ILogicalOperator parentOperator = null;
+ ILogicalExpression unnestArg = null;
+ List<LogicalVariable> projectVariables = new ArrayList<LogicalVariable>();
+ for (Mutable<ILogicalOperator> parentLOpRef : parents) {
+ VariableUtilities.getLiveVariables(parentLOpRef.getValue(), projectVariables);
+ }
+ for (Operator parentOp : operator.getParentOperators()) {
+ if (parentOp instanceof UDTFOperator) {
+ int index = operator.getParentOperators().indexOf(parentOp);
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(index).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(index).getValue();
+ }
+ }
+
+ LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
+ Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
+ unnestArg));
+ ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
+
+ List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), outputVars);
+ outputVars.add(var);
+ ILogicalOperator inputProjectOperator = new ProjectOperator(projectVariables);
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(inputProjectOperator));
+ inputProjectOperator.getInputs().addAll(parentOperator.getInputs());
+
+ parents.clear();
+ udtf = null;
+ List<ColumnInfo> inputSchema = operator.getSchema().getSignature();
+ rewriteOperatorDesc(outputVars, operator.getConf(), inputSchema, t);
+ //t.rewriteOperatorOutputSchema(outputVars, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ udtf = (UDTFDesc) operator.getConf();
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+ private void rewriteOperatorDesc(List<LogicalVariable> variables, LateralViewJoinDesc desc,
+ List<ColumnInfo> schema, Translator t) {
+ List<String> outputFieldNames = desc.getOutputInternalColNames();
+ for (int i = 0; i < variables.size(); i++) {
+ LogicalVariable var = variables.get(i);
+ String fieldName = outputFieldNames.get(i);
+ String tabAlias = findTabAlias(fieldName, schema);
+ fieldName = tabAlias + "." + fieldName;
+ if (fieldName.indexOf("$$") < 0) {
+ //outputFieldNames.set(i, var.toString());
+ t.updateVariable(fieldName, var);
+ }
+ }
+ }
+
+ private String findTabAlias(String fieldName, List<ColumnInfo> schema) {
+ for (int i = 0; i < schema.size(); i++) {
+ ColumnInfo column = schema.get(i);
+ if (column.getInternalName().equals(fieldName)) {
+ return column.getTabAlias();
+ }
+ }
+ return "null";
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
index 186b291..3ed9786 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
@@ -14,36 +14,36 @@
*/
package edu.uci.ics.hivesterix.logical.plan.visitor;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
@SuppressWarnings("rawtypes")
public class MapJoinVisitor extends DefaultVisitor {
@@ -56,7 +56,7 @@
@Override
public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- List<Operator<? extends Serializable>> joinSrc = operator.getParentOperators();
+ List<Operator<? extends OperatorDesc>> joinSrc = operator.getParentOperators();
List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
if (parents == null) {
parents = new ArrayList<Mutable<ILogicalOperator>>();
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
index 74cebaa..25abdec 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
@@ -69,9 +69,9 @@
@Override
public Mutable<ILogicalOperator> visit(TableScanOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
TableScanDesc desc = (TableScanDesc) operator.getConf();
- if (desc == null) {
+ if (desc == null || desc.getAlias()==null) {
List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
VariableUtilities.getLiveVariables(AlgebricksParentOperator.getValue(), schema);
t.rewriteOperatorOutputSchema(schema, operator);
@@ -124,7 +124,6 @@
@Override
public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0)
return null;
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
index 1cb5121..c710f3f 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
@@ -14,21 +14,21 @@
*/
package edu.uci.ics.hivesterix.logical.plan.visitor;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
public class UnionVisitor extends DefaultVisitor {
@@ -46,8 +46,8 @@
List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
- VariableUtilities.getUsedVariables(parents.get(1).getValue(), rightVars);
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), leftVars);
+ VariableUtilities.getLiveVariables(parents.get(1).getValue(), rightVars);
List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
index 32b0f66..5b6ac50 100644
--- a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
@@ -12,173 +12,181 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings("rawtypes")
-public interface Translator {
-
- /**
- * generate input schema
- *
- * @param operator
- * @return
- */
- public Schema generateInputSchema(Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr);
-
- /**
- * get an assign operator as a child of parent
- *
- * @param parent
- * @param cols
- * @param variables
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
- ArrayList<LogicalVariable> variables);
-
- /**
- * get type for a logical variable
- *
- * @param var
- * @return type info
- */
- public TypeInfo getType(LogicalVariable var);
-
- /**
- * translate an expression from hive to Algebricks
- *
- * @param desc
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
-
- /**
- * translate an aggregation from hive to Algebricks
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
-
- /**
- * translate unnesting (UDTF) function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
-
- /**
- * get variable from a schema
- *
- * @param schema
- * @return
- */
- public List<LogicalVariable> getVariablesFromSchema(Schema schema);
-
- /**
- * get variable from name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariable(String name);
-
- /**
- * get variable from field name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariableFromFieldName(String name);
-
- /**
- * get variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getVariable(String fieldName, TypeInfo type);
-
- /**
- * get new variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
-
- /**
- * set the metadata provider
- *
- * @param metadata
- */
- public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
-
- /**
- * get the metadata provider
- *
- * @param metadata
- */
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
-
- /**
- * replace the variable
- *
- * @param oldVar
- * @param newVar
- */
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
-
-}
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings("rawtypes")
+public interface Translator {
+
+ /**
+ * generate input schema
+ *
+ * @param operator
+ * @return
+ */
+ public Schema generateInputSchema(Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr);
+
+ /**
+ * get an assign operator as a child of parent
+ *
+ * @param parent
+ * @param cols
+ * @param variables
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables);
+
+ /**
+ * get type for a logical variable
+ *
+ * @param var
+ * @return type info
+ */
+ public TypeInfo getType(LogicalVariable var);
+
+ /**
+ * translate an expression from hive to Algebricks
+ *
+ * @param desc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
+
+ /**
+ * translate an aggregation from hive to Algebricks
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
+
+ /**
+ * translate unnesting (UDTF) function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
+
+ /**
+ * get variable from a schema
+ *
+ * @param schema
+ * @return
+ */
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema);
+
+ /**
+ * get variable from name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariable(String name);
+
+ /**
+ * get variable from field name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariableFromFieldName(String name);
+
+ /**
+ * get variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getVariable(String fieldName, TypeInfo type);
+
+ /**
+ * get new variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
+
+ /**
+ * update a name-variable binding
+ *
+ * @param fieldName
+ * @param variable
+ */
+ public void updateVariable(String fieldName, LogicalVariable variable);
+
+ /**
+ * set the metadata provider
+ *
+ * @param metadata
+ */
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
+
+ /**
+ * get the metadata provider
+ *
+ * @param metadata
+ */
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
+
+ /**
+ * replace the variable
+ *
+ * @param oldVar
+ * @param newVar
+ */
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
+
+}
diff --git a/hivesterix/pom.xml b/hivesterix/pom.xml
index 2bee50c..ba43c0d 100644
--- a/hivesterix/pom.xml
+++ b/hivesterix/pom.xml
@@ -17,7 +17,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix</artifactId>
- <version>0.2.7-SNAPSHOT</version>
+ <version>0.2.10-SNAPSHOT</version>
<packaging>pom</packaging>
<name>hivesterix</name>