migrate hivesterix to depend on hive-0.11.0
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
index 8ecdfe1..83de061 100644
--- a/hivesterix/hivesterix-dist/pom.xml
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -25,260 +25,12 @@
<dependencies>
<dependency>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- <version>2.5</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.8.1</version>
<scope>compile</scope>
</dependency>
<dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>0.20.2</version>
@@ -286,6 +38,69 @@
<scope>compile</scope>
</dependency>
<dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-hwi</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-service</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-shims</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.11.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
<groupId>edu.uci.ics.hyracks</groupId>
<artifactId>hivesterix-translator</artifactId>
<version>0.2.7-SNAPSHOT</version>
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index d3bcaca..bfab157 100644
--- a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -36,16 +36,20 @@
import org.apache.hadoop.hive.ql.exec.ConditionalTask;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.mapred.Reporter;
import edu.uci.ics.hivesterix.common.config.ConfUtil;
import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
@@ -214,7 +218,6 @@
// get all leave Ops
getLeaves(rootOps, leaveOps);
-
HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();
try {
translator.translate(rootOps, null, aliasToPath);
@@ -377,6 +380,12 @@
// remove map-reduce branches in condition task
ConditionalTask condition = (ConditionalTask) task;
List<Task<? extends Serializable>> branches = condition.getListTasks();
+ for (Task branch : branches) {
+ if (branch instanceof MoveTask) {
+ //return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);
+ return null;
+ }
+ }
for (int i = branches.size() - 1; i >= 0; i--) {
Task branch = branches.get(i);
if (branch instanceof MapRedTask) {
@@ -396,7 +405,7 @@
MapRedTask mrtask = (MapRedTask) task;
MapredWork work = (MapredWork) mrtask.getWork();
- HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> operators = work.getAliasToWork();
Set entries = operators.entrySet();
Iterator<Entry<String, Operator>> iterator = entries.iterator();
@@ -414,7 +423,7 @@
// get map local work
MapredLocalWork localWork = work.getMapLocalWork();
if (localWork != null) {
- HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();
+ HashMap<String, Operator<? extends OperatorDesc>> localOperators = localWork.getAliasToWork();
Set localEntries = localOperators.entrySet();
Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();
@@ -479,9 +488,9 @@
for (Operator childMap : childMapOps) {
if (childMap instanceof TableScanOperator) {
TableScanDesc topDesc = (TableScanDesc) childMap.getConf();
- if (topDesc == null)
+ if (topDesc == null || topDesc.getAlias() == null) {
mapChildren.add(childMap);
- else {
+ } else {
rootOps.add(childMap);
}
} else {
@@ -501,9 +510,14 @@
}
i = 0;
for (Operator child : mapChildren) {
- if (child.getParentOperators() == null || child.getParentOperators().size() == 0)
+ if (child.getParentOperators() == null || child.getParentOperators().size() == 0) {
child.setParentOperators(new ArrayList<Operator>());
- child.getParentOperators().add(leafs.get(i));
+ }
+ if (i < leafs.size()) {
+ if (child.getParentOperators().size()==0) {
+ child.getParentOperators().add(leafs.get(i));
+ }
+ }
i++;
}
}
@@ -603,10 +617,10 @@
String specPath = desc.getDirName();
DynamicPartitionCtx dpCtx = desc.getDynPartCtx();
// for 0.7.0
- fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
+ //fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
// for 0.8.0
- // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,
- // desc);
+ //Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc);
+ Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx, desc, Reporter.NULL);
}
}
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
index 4ef74e9..a7ccb5a 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -1,17 +1,3 @@
-/*
- * Copyright 2009-2013 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -42,11 +28,13 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;
+import java.util.concurrent.ConcurrentLinkedQueue;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
@@ -62,6 +50,7 @@
import org.apache.hadoop.hive.ql.exec.ExecDriver;
import org.apache.hadoop.hive.ql.exec.FetchTask;
import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.MoveTask;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
@@ -86,23 +75,22 @@
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
import org.apache.hadoop.hive.ql.metadata.DummyPartition;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.ErrorMsg;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ImportSemanticAnalyzer;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.ParseException;
import org.apache.hadoop.hive.ql.parse.ParseUtils;
import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
@@ -112,6 +100,7 @@
import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.processors.CommandProcessor;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
@@ -127,15 +116,18 @@
import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-@SuppressWarnings({ "deprecation", "unused" })
+@SuppressWarnings({ "deprecation", "unchecked" })
public class Driver implements CommandProcessor {
- static final private Log LOG = LogFactory.getLog(Driver.class.getName());
- static final private LogHelper console = new LogHelper(LOG);
-
// hive-sterix
private IExecutionEngine engine;
private boolean hivesterix = false;
+ private Set<Task> executedConditionalTsks = new HashSet<Task>();
+
+ static final private Log LOG = LogFactory.getLog(Driver.class.getName());
+ static final private LogHelper console = new LogHelper(LOG);
+
+ private static final Object compileMonitor = new Object();
private int maxRows = 100;
ByteStream.Output bos = new ByteStream.Output();
@@ -152,23 +144,57 @@
// A limit on the number of threads that can be launched
private int maxthreads;
- private final int sleeptime = 2000;
-
+ private static final int SLEEP_TIME = 2000;
protected int tryCount = Integer.MAX_VALUE;
- private int checkLockManager() {
+ /**
+ * for backwards compatibility with current tests
+ */
+ public Driver(HiveConf conf) {
+ this.conf = conf;
+
+ }
+
+ public Driver() {
+ if (SessionState.get() != null) {
+ conf = SessionState.get().getConf();
+ }
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ // hivesterix: plan printer
+ public Driver(HiveConf conf, PrintWriter planPrinter) {
+ this.conf = conf;
+ engine = new HyracksExecutionEngine(conf, planPrinter);
+ }
+
+ public void clear() {
+ this.hivesterix = false;
+ this.executedConditionalTsks.clear();
+ }
+
+ private boolean checkLockManager() {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency && (hiveLockMgr == null)) {
+ if (!supportConcurrency) {
+ return false;
+ }
+ if ((hiveLockMgr == null)) {
try {
setLockManager();
} catch (SemanticException e) {
errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ return false;
}
}
- return (0);
+ // the reason that we set the lock manager for the cxt here is because each
+ // query has its own ctx object. The hiveLockMgr is shared accross the
+ // same instance of Driver, which can run multiple queries.
+ ctx.setHiveLockMgr(hiveLockMgr);
+ return hiveLockMgr != null;
}
private void setLockManager() throws SemanticException {
@@ -183,6 +209,16 @@
hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
} catch (Exception e) {
+ // set hiveLockMgr to null just in case this invalid manager got set to
+ // next query's ctx.
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e1) {
+ //nothing can do here
+ }
+ hiveLockMgr = null;
+ }
throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
}
}
@@ -230,10 +266,8 @@
} else if (sem.getFetchTask() != null) {
FetchTask ft = sem.getFetchTask();
TableDesc td = ft.getTblDesc();
- // partitioned tables don't have tableDesc set on the FetchTask.
- // Instead
- // they have a list of PartitionDesc objects, each with a table
- // desc.
+ // partitioned tables don't have tableDesc set on the FetchTask. Instead
+ // they have a list of PartitionDesc objects, each with a table desc.
// Let's
// try to fetch the desc for the first partition and use it's
// deserializer.
@@ -320,59 +354,102 @@
}
/**
- * for backwards compatibility with current tests
- */
- public Driver(HiveConf conf) {
- this.conf = conf;
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- public Driver() {
- if (SessionState.get() != null) {
- conf = SessionState.get().getConf();
- }
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- // hivesterix: plan printer
- public Driver(HiveConf conf, PrintWriter planPrinter) {
- this.conf = conf;
- engine = new HyracksExecutionEngine(conf, planPrinter);
- }
-
- public void clear() {
- this.hivesterix = false;
- }
-
- /**
- * Compile a new query. Any currently-planned query associated with this
- * Driver is discarded.
+ * Compile a new query. Any currently-planned query associated with this Driver is discarded.
*
* @param command
* The SQL query to compile.
*/
public int compile(String command) {
+ return compile(command, true);
+ }
+
+ /**
+ * Hold state variables specific to each query being executed, that may not
+ * be consistent in the overall SessionState
+ */
+ private static class QueryState {
+ private HiveOperation op;
+ private String cmd;
+ private boolean init = false;
+
+ /**
+ * Initialize the queryState with the query state variables
+ */
+ public void init(HiveOperation op, String cmd) {
+ this.op = op;
+ this.cmd = cmd;
+ this.init = true;
+ }
+
+ public boolean isInitialized() {
+ return this.init;
+ }
+
+ public HiveOperation getOp() {
+ return this.op;
+ }
+
+ public String getCmd() {
+ return this.cmd;
+ }
+ }
+
+ public void saveSession(QueryState qs) {
+ SessionState oldss = SessionState.get();
+ if (oldss != null && oldss.getHiveOperation() != null) {
+ qs.init(oldss.getHiveOperation(), oldss.getCmd());
+ }
+ }
+
+ public void restoreSession(QueryState qs) {
+ SessionState ss = SessionState.get();
+ if (ss != null && qs != null && qs.isInitialized()) {
+ ss.setCmd(qs.getCmd());
+ ss.setCommandType(qs.getOp());
+ }
+ }
+
+ /**
+ * Compile a new query, but potentially reset taskID counter. Not resetting task counter
+ * is useful for generating re-entrant QL queries.
+ *
+ * @param command
+ * The HiveQL query to compile
+ * @param resetTaskIds
+ * Resets taskID counter if true.
+ * @return 0 for ok
+ */
+ public int compile(String command, boolean resetTaskIds) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.COMPILE);
+
+ //holder for parent command type/string when executing reentrant queries
+ QueryState queryState = new QueryState();
+
if (plan != null) {
close();
plan = null;
}
- TaskFactory.resetId();
+ if (resetTaskIds) {
+ TaskFactory.resetId();
+ }
+ saveSession(queryState);
try {
command = new VariableSubstitution().substitute(conf, command);
ctx = new Context(conf);
+ ctx.setTryCount(getTryCount());
+ ctx.setCmd(command);
+ ctx.setHDFSCleanup(true);
ParseDriver pd = new ParseDriver();
ASTNode tree = pd.parse(command, ctx);
tree = ParseUtils.findRootNonNullToken(tree);
BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
- List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+ List<AbstractSemanticAnalyzerHook> saHooks = getHooks(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK,
+ AbstractSemanticAnalyzerHook.class);
// Do semantic analysis and plan generation
if (saHooks != null) {
@@ -382,6 +459,7 @@
tree = hook.preAnalyze(hookCtx, tree);
}
sem.analyze(tree, ctx);
+ hookCtx.update(sem);
for (AbstractSemanticAnalyzerHook hook : saHooks) {
hook.postAnalyze(hookCtx, sem.getRootTasks());
}
@@ -394,19 +472,10 @@
// validate the plan
sem.validate();
- plan = new QueryPlan(command, sem);
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
+ plan = new QueryPlan(command, sem, perfLogger.getStartTime(PerfLogger.DRIVER_RUN));
// test Only - serialize the query plan and deserialize it
- if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
-
- Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ if ("true".equalsIgnoreCase(System.getProperty("test.serialize.qplan"))) {
String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
LOG.info("query plan = " + queryPlanFileName);
@@ -431,17 +500,24 @@
plan.getFetchTask().initialize(conf, plan, null);
}
- // do the authorization check
+ // get the output schema
+ schema = getSchema(sem, conf);
+
+ //do the authorization check
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
try {
- // doAuthorization(sem);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DO_AUTHORIZATION);
+ doAuthorization(sem);
} catch (AuthorizationException authExp) {
console.printError("Authorization failed:" + authExp.getMessage()
+ ". Use show grant to get more details.");
return 403;
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DO_AUTHORIZATION);
}
}
+ //restore state after we're done executing a specific query
// hyracks run
if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
int engineRet = engine.compileJob(sem.getRootTasks());
@@ -450,21 +526,19 @@
}
}
return 0;
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (ParseException e) {
- errorMessage = "FAILED: Parse Error: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (11);
} catch (Exception e) {
- errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
+ ErrorMsg error = ErrorMsg.getErrorMsg(e.getMessage());
+ errorMessage = "FAILED: " + e.getClass().getSimpleName();
+ if (error != ErrorMsg.GENERIC_ERROR) {
+ errorMessage += " [Error " + error.getErrorCode() + "]:";
+ }
+ errorMessage += " " + e.getMessage();
+ SQLState = error.getSQLState();
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return error.getErrorCode();
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.COMPILE);
+ restoreSession(queryState);
}
}
@@ -479,13 +553,13 @@
ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
} else {
- // if (op.equals(HiveOperation.IMPORT)) {
- // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
- // if (!isa.existsTable()) {
- ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
- HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
- // }
- // }
+ if (op.equals(HiveOperation.IMPORT)) {
+ ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+ if (!isa.existsTable()) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ }
+ }
}
if (outputs != null && outputs.size() > 0) {
for (WriteEntity write : outputs) {
@@ -513,8 +587,8 @@
Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
for (ReadEntity read : inputs) {
- if (read.getPartition() != null) {
- Table tbl = read.getTable();
+ Table tbl = read.getTable();
+ if ((read.getPartition() != null) || (tbl.isPartitioned())) {
String tblName = tbl.getTableName();
if (tableUsePartLevelAuth.get(tblName) == null) {
boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
@@ -533,9 +607,9 @@
ParseContext parseCtx = querySem.getParseContext();
Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
- for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem.getParseContext()
.getTopOps().entrySet()) {
- Operator<? extends Serializable> topOp = topOpMap.getValue();
+ Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
TableScanOperator tableScanOp = (TableScanOperator) topOp;
Table tbl = tsoTopMap.get(tableScanOp);
@@ -551,7 +625,10 @@
cols.add(columns.get(i).getName());
}
}
- if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+ //map may not contain all sources, since input list may have been optimized out
+ //or non-existent tho such sources may still be referenced by the TableScanOperator
+ //if it's null then the partition probably doesn't exist so let's use table permission
+ if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
@@ -582,30 +659,28 @@
// cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
for (ReadEntity read : inputs) {
- Table tbl = null;
+ Table tbl = read.getTable();
if (read.getPartition() != null) {
- tbl = read.getPartition().getTable();
+ Partition partition = read.getPartition();
+ tbl = partition.getTable();
// use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName())) {
- List<String> cols = part2Cols.get(read.getPartition());
+ if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ List<String> cols = part2Cols.get(partition);
if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+ ss.getAuthorizer().authorize(partition.getTable(), partition, cols,
op.getInputRequiredPrivileges(), null);
} else {
- ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+ ss.getAuthorizer().authorize(partition, op.getInputRequiredPrivileges(), null);
}
continue;
}
- } else if (read.getTable() != null) {
- tbl = read.getTable();
}
- // if we reach here, it means it needs to do a table
- // authorization
- // check, and the table authorization may already happened
- // because of other
+ // if we reach here, it means it needs to do a table authorization
+ // check, and the table authorization may already happened because of other
// partitions
- if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+ if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())
+ && !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) {
List<String> cols = tab2Cols.get(tbl);
if (cols != null && cols.size() > 0) {
ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
@@ -632,16 +707,15 @@
* @param p
* The partition to be locked
* @param mode
- * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
- * objects to be locked. If a partition needs to be locked (in
- * any mode), all its parents should also be locked in SHARED
- * mode.
+ * The mode of the lock (SHARED/EXCLUSIVE) Get the list of objects to be locked. If a
+ * partition needs to be locked (in any mode), all its parents should also be locked in
+ * SHARED mode.
**/
private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
if (t != null) {
locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
@@ -665,16 +739,20 @@
name = p.getName().split("@")[2];
}
- String partName = name;
String partialName = "";
String[] partns = name.split("/");
int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+ Map<String, String> partialSpec = new LinkedHashMap<String, String>();
for (int idx = 0; idx < len; idx++) {
String partn = partns[idx];
partialName += partn;
+ String[] nameValue = partn.split("=");
+ assert (nameValue.length == 2);
+ partialSpec.put(nameValue[0], nameValue[1]);
try {
locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
- .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+ .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName, partialSpec),
+ lockData), mode));
partialName += "/";
} catch (HiveException e) {
throw new SemanticException(e.getMessage());
@@ -688,17 +766,16 @@
}
/**
- * Acquire read and write locks needed by the statement. The list of objects
- * to be locked are obtained from he inputs and outputs populated by the
- * compiler. The lock acuisition scheme is pretty simple. If all the locks
- * cannot be obtained, error out. Deadlock is avoided by making sure that
- * the locks are lexicographically sorted.
+ * Acquire read and write locks needed by the statement. The list of objects to be locked are
+ * obtained from he inputs and outputs populated by the compiler. The lock acuisition scheme is
+ * pretty simple. If all the locks cannot be obtained, error out. Deadlock is avoided by making
+ * sure that the locks are lexicographically sorted.
**/
public int acquireReadWriteLocks() {
- try {
- int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
- int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
+ try {
boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
if (!supportConcurrency) {
return 0;
@@ -707,8 +784,7 @@
List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
// Sort all the inputs, outputs.
- // If a lock needs to be acquired on any partition, a read lock
- // needs to be acquired on all
+ // If a lock needs to be acquired on any partition, a read lock needs to be acquired on all
// its parents also
for (ReadEntity input : plan.getInputs()) {
if (input.getType() == ReadEntity.Type.TABLE) {
@@ -719,16 +795,21 @@
}
for (WriteEntity output : plan.getOutputs()) {
+ List<HiveLockObj> lockObj = null;
if (output.getTyp() == WriteEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(output.getTable(), null,
- output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+ lockObj = getLockObjects(output.getTable(), null, output.isComplete() ? HiveLockMode.EXCLUSIVE
+ : HiveLockMode.SHARED);
} else if (output.getTyp() == WriteEntity.Type.PARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE);
}
- // In case of dynamic queries, it is possible to have incomplete
- // dummy partitions
+ // In case of dynamic queries, it is possible to have incomplete dummy partitions
else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
- lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+ lockObj = getLockObjects(null, output.getPartition(), HiveLockMode.SHARED);
+ }
+
+ if (lockObj != null) {
+ lockObjects.addAll(lockObj);
+ ctx.getOutputLockObjects().put(output, lockObj);
}
}
@@ -736,13 +817,8 @@
return 0;
}
- int ret = checkLockManager();
- if (ret != 0) {
- return ret;
- }
-
HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
+ .currentTimeMillis()), "IMPLICIT", plan.getQueryStr());
// Lock the database also
try {
@@ -753,25 +829,7 @@
throw new SemanticException(e.getMessage());
}
- ctx.setHiveLockMgr(hiveLockMgr);
- List<HiveLock> hiveLocks = null;
-
- int tryNum = 1;
- do {
-
- // ctx.getHiveLockMgr();
- // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
-
- if (hiveLocks != null) {
- break;
- }
-
- tryNum++;
- try {
- Thread.sleep(sleepTime);
- } catch (InterruptedException e) {
- }
- } while (tryNum < numRetries);
+ List<HiveLock> hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
if (hiveLocks == null) {
throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
@@ -785,138 +843,207 @@
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- } catch (Exception e) {
+ } catch (LockException e) {
errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
SQLState = ErrorMsg.findSQLState(e.getMessage());
console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
return (10);
- }
- }
-
- /**
- * Release all the locks acquired implicitly by the statement. Note that the
- * locks acquired with 'keepAlive' set to True are not released.
- **/
- private void releaseLocks() {
- if (ctx != null && ctx.getHiveLockMgr() != null) {
- try {
- ctx.getHiveLockMgr().close();
- ctx.setHiveLocks(null);
- } catch (LockException e) {
- }
+ } finally {
+ perfLogger.PerfLogEnd(LOG, PerfLogger.ACQUIRE_READ_WRITE_LOCKS);
}
}
/**
* @param hiveLocks
- * list of hive locks to be released Release all the locks
- * specified. If some of the locks have already been released,
- * ignore them
+ * list of hive locks to be released Release all the locks specified. If some of the
+ * locks have already been released, ignore them
**/
private void releaseLocks(List<HiveLock> hiveLocks) {
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.RELEASE_LOCKS);
+
if (hiveLocks != null) {
ctx.getHiveLockMgr().releaseLocks(hiveLocks);
}
ctx.setHiveLocks(null);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.RELEASE_LOCKS);
}
- public CommandProcessorResponse run(String command) {
+ public CommandProcessorResponse run(String command) throws CommandNeedRetryException {
errorMessage = null;
SQLState = null;
- int ret = compile(command);
+ if (!validateConfVariables()) {
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ HiveDriverRunHookContext hookContext = new HiveDriverRunHookContextImpl(conf, command);
+ // Get all the driver run hooks and pre-execute them.
+ List<HiveDriverRunHook> driverRunHooks;
+ try {
+ driverRunHooks = getHooks(HiveConf.ConfVars.HIVE_DRIVER_RUN_HOOKS, HiveDriverRunHook.class);
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.preDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
+ // Reset the perf logger
+ PerfLogger perfLogger = PerfLogger.getPerfLogger(true);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.PerfLogBegin(LOG, PerfLogger.TIME_TO_SUBMIT);
+
+ int ret;
+ synchronized (compileMonitor) {
+ ret = compile(command);
+ }
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // ret = acquireReadWriteLocks();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ boolean requireLock = false;
+ boolean ckLock = checkLockManager();
+
+ if (ckLock) {
+ boolean lockOnlyMapred = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_LOCK_MAPRED_ONLY);
+ if (lockOnlyMapred) {
+ Queue<Task<? extends Serializable>> taskQueue = new LinkedList<Task<? extends Serializable>>();
+ taskQueue.addAll(plan.getRootTasks());
+ while (taskQueue.peek() != null) {
+ Task<? extends Serializable> tsk = taskQueue.remove();
+ requireLock = requireLock || tsk.requireLock();
+ if (requireLock) {
+ break;
+ }
+ if (tsk instanceof ConditionalTask) {
+ taskQueue.addAll(((ConditionalTask) tsk).getListTasks());
+ }
+ if (tsk.getChildTasks() != null) {
+ taskQueue.addAll(tsk.getChildTasks());
+ }
+ // does not add back up task here, because back up task should be the same
+ // type of the original task.
+ }
+ } else {
+ requireLock = true;
+ }
+ }
+
+ if (requireLock) {
+ ret = acquireReadWriteLocks();
+ if (ret != 0) {
+ releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
}
ret = execute();
if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
return new CommandProcessorResponse(ret, errorMessage, SQLState);
}
- // releaseLocks(ctx.getHiveLocks());
+ //if needRequireLock is false, the release here will do nothing because there is no lock
+ releaseLocks(ctx.getHiveLocks());
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_RUN);
+ perfLogger.close(LOG, plan);
+
+ // Take all the driver run hooks and post-execute them.
+ try {
+ for (HiveDriverRunHook driverRunHook : driverRunHooks) {
+ driverRunHook.postDriverRun(hookContext);
+ }
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return new CommandProcessorResponse(12, errorMessage, SQLState);
+ }
+
return new CommandProcessorResponse(ret);
}
- private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
- ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
- String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
- if (pestr == null) {
- return saHooks;
+ /**
+ * Validate configuration variables.
+ *
+ * @return
+ */
+ private boolean validateConfVariables() {
+ boolean valid = true;
+ if ((!conf.getBoolVar(HiveConf.ConfVars.HIVE_HADOOP_SUPPORTS_SUBDIRECTORIES))
+ && ((conf.getBoolVar(HiveConf.ConfVars.HADOOPMAPREDINPUTDIRRECURSIVE))
+ || (conf.getBoolVar(HiveConf.ConfVars.HIVEOPTLISTBUCKETING)) || ((conf
+ .getBoolVar(HiveConf.ConfVars.HIVE_OPTIMIZE_UNION_REMOVE))))) {
+ errorMessage = "FAILED: Hive Internal Error: " + ErrorMsg.SUPPORT_DIR_MUST_TRUE_FOR_LIST_BUCKETING.getMsg();
+ SQLState = ErrorMsg.findSQLState(errorMessage);
+ console.printError(errorMessage + "\n");
+ valid = false;
}
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return saHooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
- saHooks.add(hook);
- } catch (HiveException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return saHooks;
+ return valid;
}
- private List<Hook> getPreExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
+ /**
+ * Returns a set of hooks specified in a configuration variable.
+ * See getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz)
+ *
+ * @param hookConfVar
+ * @return
+ * @throws Exception
+ */
+ private List<Hook> getHooks(HiveConf.ConfVars hookConfVar) throws Exception {
+ return getHooks(hookConfVar, Hook.class);
+ }
+
+ /**
+ * Returns the hooks specified in a configuration variable. The hooks are returned in a list in
+ * the order they were specified in the configuration variable.
+ *
+ * @param hookConfVar
+ * The configuration variable specifying a comma separated list of the hook
+ * class names.
+ * @param clazz
+ * The super type of the hooks.
+ * @return A list of the hooks cast as the type specified in clazz, in the order
+ * they are listed in the value of hookConfVar
+ * @throws Exception
+ */
+ private <T extends Hook> List<T> getHooks(HiveConf.ConfVars hookConfVar, Class<T> clazz) throws Exception {
+
+ List<T> hooks = new ArrayList<T>();
+ String csHooks = conf.getVar(hookConfVar);
+ if (csHooks == null) {
+ return hooks;
}
- String[] peClasses = pestr.split(",");
+ csHooks = csHooks.trim();
+ if (csHooks.equals("")) {
+ return hooks;
+ }
- for (String peClass : peClasses) {
+ String[] hookClasses = csHooks.split(",");
+
+ for (String hookClass : hookClasses) {
try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ T hook = (T) Class.forName(hookClass.trim(), true, JavaUtils.getClassLoader()).newInstance();
+ hooks.add(hook);
} catch (ClassNotFoundException e) {
- console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ console.printError(hookConfVar.varname + " Class not found:" + e.getMessage());
throw e;
}
}
- return pehooks;
+ return hooks;
}
- private List<Hook> getPostExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Post Exec Hook Class not found:" + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- public int execute() {
+ public int execute() throws CommandNeedRetryException {
// execute hivesterix plan
if (hivesterix) {
hivesterix = false;
@@ -925,6 +1052,9 @@
return ret;
}
+ PerfLogger perfLogger = PerfLogger.getPerfLogger();
+ perfLogger.PerfLogBegin(LOG, PerfLogger.DRIVER_EXECUTE);
+
boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
@@ -933,6 +1063,10 @@
conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+
+ conf.set("mapreduce.workflow.id", "hive_" + queryId);
+ conf.set("mapreduce.workflow.name", queryStr);
+
maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
try {
@@ -946,14 +1080,23 @@
}
resStream = null;
- HookContext hookContext = new HookContext(plan, conf);
+ HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+ hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
- for (Hook peh : getPreExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
} else if (peh instanceof PreExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
+
((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.PRE_HOOK + peh.getClass().getName());
}
}
@@ -968,32 +1111,36 @@
}
String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
- // A runtime that launches runnable tasks as separate Threads
- // through
+ // A runtime that launches runnable tasks as separate Threads through
// TaskRunners
// As soon as a task isRunnable, it is put in a queue
// At any time, at most maxthreads tasks can be running
- // The main thread polls the TaskRunners to check if they have
- // finished.
+ // The main thread polls the TaskRunners to check if they have finished.
- Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+ Queue<Task<? extends Serializable>> runnable = new ConcurrentLinkedQueue<Task<? extends Serializable>>();
Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
DriverContext driverCxt = new DriverContext(runnable, ctx);
+ ctx.setHDFSCleanup(true);
+
+ SessionState.get().setLastMapRedStatsList(new ArrayList<MapRedStats>());
+ SessionState.get().setStackTraces(new HashMap<String, List<List<String>>>());
+ SessionState.get().setLocalMapRedErrors(new HashMap<String, List<String>>());
// Add root Tasks to runnable
-
for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ // This should never happen, if it does, it's a bug with the potential to produce
+ // incorrect results.
+ assert tsk.getParentTasks() == null || tsk.getParentTasks().isEmpty();
driverCxt.addToRunnable(tsk);
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.TIME_TO_SUBMIT);
// Loop while you either have tasks running, or tasks queued up
-
while (running.size() != 0 || runnable.peek() != null) {
// Launch upto maxthreads tasks
while (runnable.peek() != null && running.size() < maxthreads) {
Task<? extends Serializable> tsk = runnable.remove();
- console.printInfo("executing task " + tsk.getName());
launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
}
@@ -1005,12 +1152,24 @@
int exitVal = tskRes.getExitVal();
if (exitVal != 0) {
+ if (tsk.ifRetryCmdWhenFail()) {
+ if (!running.isEmpty()) {
+ taskCleanup(running);
+ }
+ // in case we decided to run everything in local mode, restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+ throw new CommandNeedRetryException();
+ }
Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
if (backupTask != null) {
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
console.printError(errorMessage);
-
errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
console.printError(errorMessage);
@@ -1021,20 +1180,31 @@
continue;
} else {
- // TODO: This error messaging is not very informative.
- // Fix that.
+ hookContext.setHookType(HookContext.HookType.ON_FAILURE_HOOK);
+ // Get all the failure execution hooks and execute them.
+ for (Hook ofh : getHooks(HiveConf.ConfVars.ONFAILUREHOOKS)) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+
+ ((ExecuteWithHookContext) ofh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.FAILURE_HOOK + ofh.getClass().getName());
+ }
+
errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ tsk.getClass().getName();
+ ErrorMsg em = ErrorMsg.getErrorMsg(exitVal);
+ if (em != null) {
+ errorMessage += ". " + em.getMsg();
+ }
SQLState = "08S01";
console.printError(errorMessage);
- if (running.size() != 0) {
- taskCleanup();
+ if (!running.isEmpty()) {
+ taskCleanup(running);
}
- // in case we decided to run everything in local mode,
- // restore the
+ // in case we decided to run everything in local mode, restore the
// the jobtracker setting to its initial value
ctx.restoreOriginalTracker();
- return 9;
+ return exitVal;
}
}
@@ -1047,9 +1217,9 @@
if (tsk.getChildTasks() != null) {
for (Task<? extends Serializable> child : tsk.getChildTasks()) {
// hivesterix: don't check launchable condition
- // if (DriverContext.isLaunchable(child)) {
+ //if(DriverContext.isLaunchable(tsk)){
driverCxt.addToRunnable(child);
- // }
+ //}
}
}
}
@@ -1059,8 +1229,7 @@
ctx.restoreOriginalTracker();
// remove incomplete outputs.
- // Some incomplete outputs may be added at the beginning, for eg:
- // for dynamic partitions.
+ // Some incomplete outputs may be added at the beginning, for eg: for dynamic partitions.
// remove them
HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
for (WriteEntity output : plan.getOutputs()) {
@@ -1073,15 +1242,24 @@
plan.getOutputs().remove(output);
}
+ hookContext.setHookType(HookContext.HookType.POST_EXEC_HOOK);
// Get all the post execution hooks and execute them.
- for (Hook peh : getPostExecHooks()) {
+ for (Hook peh : getHooks(HiveConf.ConfVars.POSTEXECHOOKS)) {
if (peh instanceof ExecuteWithHookContext) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((ExecuteWithHookContext) peh).run(hookContext);
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
} else if (peh instanceof PostExecute) {
+ perfLogger.PerfLogBegin(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
+
((PostExecute) peh)
.run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
(SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
: null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+
+ perfLogger.PerfLogEnd(LOG, PerfLogger.POST_HOOK + peh.getClass().getName());
}
}
@@ -1089,7 +1267,10 @@
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
SessionState.get().getHiveHistory().printRowCount(queryId);
}
+ } catch (CommandNeedRetryException e) {
+ throw e;
} catch (Exception e) {
+ ctx.restoreOriginalTracker();
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
}
@@ -1105,6 +1286,18 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
}
+ perfLogger.PerfLogEnd(LOG, PerfLogger.DRIVER_EXECUTE);
+
+ if (SessionState.get().getLastMapRedStatsList() != null
+ && SessionState.get().getLastMapRedStatsList().size() > 0) {
+ long totalCpu = 0;
+ console.printInfo("MapReduce Jobs Launched: ");
+ for (int i = 0; i < SessionState.get().getLastMapRedStatsList().size(); i++) {
+ console.printInfo("Job " + i + ": " + SessionState.get().getLastMapRedStatsList().get(i));
+ totalCpu += SessionState.get().getLastMapRedStatsList().get(i).getCpuMSec();
+ }
+ console.printInfo("Total MapReduce CPU Time Spent: " + Utilities.formatMsecToStr(totalCpu));
+ }
}
plan.setDone();
@@ -1134,14 +1327,12 @@
* name of the task, if it is a map-reduce job
* @param jobs
* number of map-reduce jobs
- * @param curJobNo
- * the sequential number of the next map-reduce job
- * @return the updated number of last the map-reduce job launched
+ * @param cxt
+ * the driver context
*/
public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
-
if (SessionState.get() != null) {
SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
}
@@ -1149,6 +1340,8 @@
if (noName) {
conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
}
+ conf.set("mapreduce.workflow.node.name", tsk.getId());
+ Utilities.setWorkflowAdjacencies(conf, plan);
cxt.incCurJobNo(1);
console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
}
@@ -1156,7 +1349,13 @@
TaskResult tskRes = new TaskResult();
TaskRunner tskRun = new TaskRunner(tsk, tskRes);
- // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+ // Launch Task
+ //if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) && tsk.isMapRedTask()) {
+ // Launch it in the parallel mode, as a separate thread only for MR tasks
+ // tskRun.start();
+ //} else {
+ // tskRun.runSequential();
+ //}
// Launch Task: hivesterix tweak
if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
// Launch it in the parallel mode, as a separate thread only for MR
@@ -1169,11 +1368,27 @@
if (crs instanceof ConditionalResolverMergeFiles) {
tskRes.setRunning(false);
tskRes.setExitVal(0);
-
- List<Task<? extends Serializable>> children = condTask.getListTasks();
- for (Task<? extends Serializable> child : children)
- if (child instanceof MapRedTask)
- cxt.addToRunnable(child);
+ if (!executedConditionalTsks.contains(tsk)) {
+ List<Task<? extends Serializable>> children = condTask.getListTasks();
+ Task<? extends Serializable> selectedBranch = null;
+ for (Task<? extends Serializable> branch : children) {
+ if (branch instanceof MoveTask) {
+ selectedBranch = branch;
+ break;
+ }
+ }
+ if (selectedBranch == null) {
+ for (int i = children.size() - 1; i >= 0; i--) {
+ Task<? extends Serializable> child = children.get(i);
+ if (child instanceof MapRedTask) {
+ selectedBranch = child;
+ break;
+ }
+ }
+ }
+ executedConditionalTsks.add(tsk);
+ cxt.addToRunnable(selectedBranch);
+ }
}
} else {
tskRun.runSequential();
@@ -1185,12 +1400,18 @@
/**
* Cleans up remaining tasks in case of failure
*/
-
- public void taskCleanup() {
- // The currently existing Shutdown hooks will be automatically called,
- // killing the map-reduce processes.
- // The non MR processes will be killed as well.
- System.exit(9);
+ public void taskCleanup(Map<TaskResult, TaskRunner> running) {
+ for (Map.Entry<TaskResult, TaskRunner> entry : running.entrySet()) {
+ if (entry.getKey().isRunning()) {
+ Task<?> task = entry.getValue().getTask();
+ try {
+ task.shutdown();
+ } catch (Exception e) {
+ console.printError("Exception on shutting down task " + task.getId() + ": " + e);
+ }
+ }
+ }
+ running.clear();
}
/**
@@ -1214,7 +1435,7 @@
// In this loop, nothing was found
// Sleep 10 seconds and restart
try {
- Thread.sleep(sleeptime);
+ Thread.sleep(SLEEP_TIME);
} catch (InterruptedException ie) {
// Do Nothing
;
@@ -1223,7 +1444,7 @@
}
}
- public boolean getResults(ArrayList<String> res) throws IOException {
+ public boolean getResults(ArrayList<String> res) throws IOException, CommandNeedRetryException {
if (plan != null && plan.getFetchTask() != null) {
FetchTask ft = plan.getFetchTask();
ft.setMaxRows(maxRows);
@@ -1276,6 +1497,14 @@
return true;
}
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+
public int close() {
try {
if (plan != null) {
@@ -1308,18 +1537,21 @@
}
public void destroy() {
- releaseLocks();
+ if (ctx != null) {
+ releaseLocks(ctx.getHiveLocks());
+ }
+
+ if (hiveLockMgr != null) {
+ try {
+ hiveLockMgr.close();
+ } catch (LockException e) {
+ LOG.warn("Exception in closing hive lock manager. "
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ }
}
public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
return plan.getQueryPlan();
}
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
index 2d5191d..1b96259 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -68,6 +68,7 @@
@Override
public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+ @SuppressWarnings("deprecation")
TypeInfo[] parameters = paramInfo.getParameters();
if (parameters.length == 0) {
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
index 0fea4b9..e26f477 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
@@ -36,7 +36,7 @@
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
-import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
/**
@@ -65,15 +65,18 @@
return result;
}
- public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(Constants.VOID_TYPE_NAME);
- public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME);
- public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(Constants.INT_TYPE_NAME);
- public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(Constants.BIGINT_TYPE_NAME);
- public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(Constants.STRING_TYPE_NAME);
- public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(Constants.FLOAT_TYPE_NAME);
- public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(Constants.DOUBLE_TYPE_NAME);
- public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME);
- public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(Constants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(serdeConstants.VOID_TYPE_NAME);
+ public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(serdeConstants.BOOLEAN_TYPE_NAME);
+ public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME);
+ public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME);
+ public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME);
+ public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME);
+ public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME);
+ public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME);
+ public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME);
+ public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME);
+ public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME);
+ public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME);
public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
index b024269..5fdfb3a 100644
--- a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
@@ -12,77 +12,77 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
index 062f7b9..ae2fa3a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q12_shipping
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
index 76d0475..c617c26 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
@@ -13,8 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-- the query
insert overwrite table q18_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
index fd330cd..a7a0a0a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
@@ -9,8 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-- the query
insert overwrite table q19_discounted_revenue
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
index 63297e6..3149962 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
@@ -22,7 +22,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q20_tmp1
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
index a975ce1..091f000 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
index 3dfb22a..444644f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
@@ -17,8 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-- the query
insert overwrite table q7_volume_shipping_tmp
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
index 586779c..a9bb58b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
@@ -17,8 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-- the query
insert overwrite table q9_product_type_profit
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
index 2891c56..70b3538 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
@@ -6,4 +6,7 @@
create external table orders (O_ORDERKEY INT, O_CUSTKEY INT, O_ORDERSTATUS STRING, O_TOTALPRICE DOUBLE, O_ORDERDATE STRING, O_ORDERPRIORITY STRING, O_CLERK STRING, O_SHIPPRIORITY INT, O_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/orders';
create external table customer (C_CUSTKEY INT, C_NAME STRING, C_ADDRESS STRING, C_NATIONKEY INT, C_PHONE STRING, C_ACCTBAL DOUBLE, C_MKTSEGMENT STRING, C_COMMENT STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE LOCATION '/tpch/customer';
-select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from customer c join orders o on c.c_custkey = o.o_custkey join lineitem l on o.o_orderkey = l.l_orderkey where c.c_custkey<5 and o.o_totalprice<30000;
+select l_linenumber, o_orderkey, o_totalprice, o_orderdate, o_shippriority from
+ customer c join orders o on c.c_custkey = o.o_custkey
+ join lineitem l on o.o_orderkey = l.l_orderkey
+ where c.c_custkey<5 and o.o_totalprice<30000;
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
index 05b3718..2629f1a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
@@ -82,7 +82,7 @@
-- HASH_PARTITION_EXCHANGE [$$30] |PARTITIONED|
project ([$$30, $$29])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01], function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$33, 1993-10-01], function-call: algebricks:lt, Args:[%0->$$33, 1994-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
index 5c240e2..623f894 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
@@ -38,7 +38,7 @@
-- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
project ([$$10, $$24])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$21, %0->$$22], function-call: algebricks:lt, Args:[%0->$$20, %0->$$21], function-call: algebricks:ge, Args:[%0->$$22, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$22, 1995-01-01], function-call: algebricks:or, Args:[function-call: algebricks:eq, Args:[%0->$$24, MAIL], function-call: algebricks:eq, Args:[%0->$$24, SHIP]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
index 19bcd24..55bd51b 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
@@ -10,17 +10,17 @@
-- STABLE_SORT [$$21(DESC), $$20(DESC)] |LOCAL|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$20 := %0->$$28]) decor ([]) {
- aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$27]]
+ group by ([$$20 := %0->$$26]) decor ([]) {
+ aggregate [$$21] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$28] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$28] |PARTITIONED|
- group by ([$$28 := %0->$$19]) decor ([]) {
- aggregate [$$27] <- [function-call: hive:count(PARTIAL1), Args:[1]]
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ group by ([$$26 := %0->$$19]) decor ([]) {
+ aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
@@ -32,49 +32,40 @@
-- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- group by ([$$18 := %0->$$26]) decor ([]) {
- aggregate [$$19] <- [function-call: hive:count(FINAL), Args:[%0->$$25]]
+ group by ([$$18 := %0->$$10]) decor ([]) {
+ aggregate [$$19] <- [function-call: hive:count(COMPLETE), Args:[%0->$$1]]
-- AGGREGATE |LOCAL|
nested tuple source
-- NESTED_TUPLE_SOURCE |LOCAL|
}
- -- EXTERNAL_GROUP_BY[$$26] |PARTITIONED|
+ -- EXTERNAL_GROUP_BY[$$10] |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- group by ([$$26 := %0->$$10]) decor ([]) {
- aggregate [$$25] <- [function-call: hive:count(PARTIAL1), Args:[%0->$$1]]
- -- AGGREGATE |LOCAL|
- nested tuple source
- -- NESTED_TUPLE_SOURCE |LOCAL|
- }
- -- EXTERNAL_GROUP_BY[$$10] |LOCAL|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$10, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- project ([$$10, $$1])
- -- STREAM_PROJECT |PARTITIONED|
+ left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
+ -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- left outer join (function-call: algebricks:eq, Args:[%0->$$2, %0->$$10])
- -- HYBRID_HASH_JOIN [$$10][$$2] |PARTITIONED|
+ -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
+ data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
- -- HASH_PARTITION_EXCHANGE [$$10] |PARTITIONED|
- data-scan [$$10]<-[$$10, $$11, $$12, $$13, $$14, $$15, $$16, $$17] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- project ([$$2, $$1])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
- -- STREAM_SELECT |PARTITIONED|
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
+ project ([$$2, $$1])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$9, %special%requests%]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$9]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7, $$8, $$9] <- default.orders
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
index 21b90bd..35055a4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
@@ -34,7 +34,7 @@
-- HASH_PARTITION_EXCHANGE [$$11] |PARTITIONED|
project ([$$11, $$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01], function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$20, 1995-09-01], function-call: algebricks:lt, Args:[%0->$$20, 1995-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
index a5bd27a..a4db8a7 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
@@ -24,7 +24,7 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$3, $$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01], function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1996-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1996-04-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
index 9835346..d1c0578 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
@@ -38,7 +38,7 @@
-- HYBRID_HASH_JOIN [$$2][$$11] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]], function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:neq, Args:[%0->$$5, Brand#45], function-call: algebricks:not, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$6, MEDIUM POLISHED%]]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
index a827007..714a56e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
@@ -84,7 +84,7 @@
-- HASH_PARTITION_EXCHANGE [$$3] |PARTITIONED|
project ([$$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$9, MED BOX], function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, Brand#23], function-call: algebricks:eq, Args:[%0->$$9, MED BOX]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
index ea47ea0..d0e1630 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
@@ -90,7 +90,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:gt, Args:[%0->$$2, 300])
+ select (function-call: algebricks:gt, Args:[%0->$$2, 300.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
index 1827729..25d1696 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
@@ -22,7 +22,7 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$15, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1]], function-call: algebricks:le, Args:[%0->$$14, 11]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10]], function-call: algebricks:le, Args:[%0->$$14, 20]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20]], function-call: algebricks:le, Args:[%0->$$14, 30]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
+ select (function-call: algebricks:or, Args:[function-call: algebricks:or, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#12], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, SM CASE||SM BOX||SM PACK||SM PKG]], function-call: algebricks:ge, Args:[%0->$$14, 1.0]], function-call: algebricks:le, Args:[%0->$$14, 11.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 5]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#23], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, MED BAG||MED BOX||MED PKG||MED PACK]], function-call: algebricks:ge, Args:[%0->$$14, 10.0]], function-call: algebricks:le, Args:[%0->$$14, 20.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 10]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]], function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$4, Brand#34], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$7, LG CASE||LG BOX||LG PACK||LG PKG]], function-call: algebricks:ge, Args:[%0->$$14, 20.0]], function-call: algebricks:le, Args:[%0->$$14, 30.0]], function-call: algebricks:ge, Args:[%0->$$6, 1]], function-call: algebricks:le, Args:[%0->$$6, 15]], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFRegExp, Args:[%0->$$24, AIR||AIR REG]], function-call: algebricks:eq, Args:[%0->$$23, DELIVER IN PERSON]]])
-- STREAM_SELECT |UNPARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
index eddfca5..cebc0eb 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
@@ -50,7 +50,7 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$2, $$3, $$5])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
index cc47cf3..fd20869 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
@@ -150,9 +150,9 @@
-- HYBRID_HASH_JOIN [$$18][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$18] |PARTITIONED|
- project ([$$16, $$18])
+ project ([$$18, $$16])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:gt, Args:[%0->$$28, %0->$$27], function-call: algebricks:gt, Args:[%0->$$28, %0->$$27]])
+ select (function-call: algebricks:gt, Args:[%0->$$28, %0->$$27])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
index 151f34d..1698365 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
@@ -10,7 +10,7 @@
-- HASH_PARTITION_EXCHANGE [$$1] |PARTITIONED|
project ([$$1, $$3])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS], function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:eq, Args:[%0->$$6, 15], function-call: hive:org.apache.hadoop.hive.ql.udf.UDFLike, Args:[%0->$$5, %BRASS]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
index 435fd7c..6929f74 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
@@ -70,7 +70,7 @@
-- HASH_PARTITION_EXCHANGE [$$2] |PARTITIONED|
project ([$$2, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01], function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$6, 1993-07-01], function-call: algebricks:lt, Args:[%0->$$6, 1993-10-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
index 177d24c..3161a61 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
@@ -106,7 +106,7 @@
-- HASH_PARTITION_EXCHANGE [$$9] |PARTITIONED|
project ([$$9, $$10])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$13, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$13, 1995-01-01]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
index cd9ffcd..92deaea 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
@@ -22,7 +22,7 @@
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
project ([$$6, $$7])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24], function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$11, 1994-01-01], function-call: algebricks:lt, Args:[%0->$$11, 1995-01-01], function-call: algebricks:ge, Args:[%0->$$7, 0.05], function-call: algebricks:le, Args:[%0->$$7, 0.07], function-call: algebricks:lt, Args:[%0->$$5, 24.0]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
index 39f8301..c3081a5 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
@@ -148,7 +148,7 @@
-- HYBRID_HASH_JOIN [$$20][$$36] |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$20] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:le, Args:[%0->$$30, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$30, 1995-01-01], function-call: algebricks:le, Args:[%0->$$30, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
index b807a24..6b1ea41 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
@@ -138,7 +138,7 @@
-- HASH_PARTITION_EXCHANGE [$$38] |PARTITIONED|
project ([$$38, $$37, $$41])
-- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31], function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01]])
+ select (function-call: algebricks:and, Args:[function-call: algebricks:ge, Args:[%0->$$41, 1995-01-01], function-call: algebricks:lt, Args:[%0->$$41, 1996-12-31]])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
index 48e624e..2cbea4a 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
@@ -6,7 +6,7 @@
-- UNNEST |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$1, $$2, $$3]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
+ data-scan [$$2, $$3, $$1]<-[$$1, $$2, $$3, $$4, $$5, $$6, $$7] <- default.supplier
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
index b5ed12f..1fb6117 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
@@ -14,7 +14,7 @@
-- STABLE_SORT [$$4(ASC)] |LOCAL|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- select (function-call: algebricks:lt, Args:[%0->$$4, 10000])
+ select (function-call: algebricks:lt, Args:[%0->$$4, 10000.0])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
diff --git a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
index ab55181..b5f1dc2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
+++ b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
@@ -16,37 +16,35 @@
-- EMPTY_TUPLE_SOURCE |PARTITIONED|
exchange
-- HASH_PARTITION_EXCHANGE [$$25] |PARTITIONED|
- project ([$$32, $$25, $$29, $$28])
+ project ([$$25, $$28, $$29, $$32])
-- STREAM_PROJECT |PARTITIONED|
- project ([$$25, $$17, $$28, $$29, $$32])
- -- STREAM_PROJECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
- -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000], function-call: algebricks:lt, Args:[%0->$$28, 30000]])
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (function-call: algebricks:eq, Args:[%0->$$26, %0->$$17])
+ -- HYBRID_HASH_JOIN [$$26][$$17] |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$26] |PARTITIONED|
+ select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$28, 30000.0], function-call: algebricks:lt, Args:[%0->$$26, 5]])
+ -- STREAM_SELECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
+ project ([$$17])
+ -- STREAM_PROJECT |PARTITIONED|
+ select (function-call: algebricks:lt, Args:[%0->$$17, 5])
-- STREAM_SELECT |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$32, $$25, $$26, $$29, $$28]<-[$$25, $$26, $$27, $$28, $$29, $$30, $$31, $$32, $$33] <- default.orders
+ data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
-- DATASOURCE_SCAN |PARTITIONED|
exchange
-- ONE_TO_ONE_EXCHANGE |PARTITIONED|
empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
- exchange
- -- HASH_PARTITION_EXCHANGE [$$17] |PARTITIONED|
- project ([$$17])
- -- STREAM_PROJECT |PARTITIONED|
- select (function-call: algebricks:and, Args:[function-call: algebricks:lt, Args:[%0->$$17, 5], function-call: algebricks:lt, Args:[%0->$$17, 5]])
- -- STREAM_SELECT |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- data-scan [$$17]<-[$$17, $$18, $$19, $$20, $$21, $$22, $$23, $$24] <- default.customer
- -- DATASOURCE_SCAN |PARTITIONED|
- exchange
- -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
- empty-tuple-source
- -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
index a4ee677..c726bfa 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
@@ -42,6 +42,20 @@
</property>
<property>
+ <name>hive.auto.convert.join.noconditionaltask</name>
+ <value>false</value>
+ <description>Whether Hive enable the optimization about converting common join into mapjoin based on the input file
+ size. If this paramater is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the
+ specified size, the join is directly converted to a mapjoin (there is no conditional task).
+ </description>
+ </property>
+
+ <property>
+ <name>hive.auto.convert.join</name>
+ <value>false</value>
+ </property>
+
+ <property>
<name>hive.hyracks.connectorpolicy</name>
<value>SEND_SIDE_MAT_PIPELINING</value>
</property>
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
index a7d8d9c..f886a44 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
@@ -43,7 +43,7 @@
# Note that the ConsoleHandler also has a separate level
# setting to limit messages printed to the console.
-.level= WARNING
+.level= INFO
# .level= INFO
# .level= FINE
# .level = FINEST
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
index 3f1214a..bb07665 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
@@ -13,9 +13,6 @@
-- create the result table
create table q10_returned_item (c_custkey int, c_name string, revenue double, c_acctbal string, n_name string, c_address string, c_phone string, c_comment string);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q10_returned_item
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
index 062f7b9..8546365 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q12_shipping(l_shipmode string, high_line_count double, low_line_count double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q12_shipping
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
index 988f400..4644d23 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q14_promotion_effect(promo_revenue double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q14_promotion_effect
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
index 04064ed..8fa333e 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
@@ -14,8 +14,6 @@
create table q15_top_supplier(s_suppkey int, s_name string, s_address string, s_phone string, total_revenue double);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
index ac2902c..d1eaacc 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
@@ -13,9 +13,6 @@
create table q18_tmp(l_orderkey int, t_sum_quantity double);
create table q18_large_volume_customer(c_name string, c_custkey int, o_orderkey int, o_orderdate string, o_totalprice double, sum_quantity double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1164000000;
-
-- the query
insert overwrite table q18_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
index 2002e1e..6badfcf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
@@ -9,9 +9,6 @@
-- create the result table
create table q19_discounted_revenue(revenue double);
-set mapred.min.split.size=268435456;
-set hive.exec.reducers.bytes.per.reducer=1040000000;
-
-- the query
insert overwrite table q19_discounted_revenue
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
index a002068..af64a4f 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
@@ -7,8 +7,6 @@
-- create the target table
CREATE TABLE q1_pricing_summary_report ( L_RETURNFLAG STRING, L_LINESTATUS STRING, SUM_QTY DOUBLE, SUM_BASE_PRICE DOUBLE, SUM_DISC_PRICE DOUBLE, SUM_CHARGE DOUBLE, AVE_QTY DOUBLE, AVE_PRICE DOUBLE, AVE_DISC DOUBLE, COUNT_ORDER INT);
-set mapred.min.split.size=536870912;
-
-- the query
INSERT OVERWRITE TABLE q1_pricing_summary_report
SELECT
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
index 2bb90ea..32181bf 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
@@ -23,8 +23,6 @@
create table q20_tmp4(ps_suppkey int);
create table q20_potential_part_promotion(s_name string, s_address string);
-set mapred.min.split.size=536870912;
-
-- the query
insert overwrite table q20_tmp1
select distinct p_partkey
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
index 0049eb3..67f6dc4 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
@@ -11,9 +11,6 @@
-- create the target table
create table q3_shipping_priority (l_orderkey int, revenue double, o_orderdate string, o_shippriority int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
Insert overwrite table q3_shipping_priority
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
index aa828e9..efbcff2 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
@@ -11,7 +11,6 @@
CREATE TABLE q4_order_priority_tmp (O_ORDERKEY INT);
CREATE TABLE q4_order_priority (O_ORDERPRIORITY STRING, ORDER_COUNT INT);
-set mapred.min.split.size=536870912;
-- the query
INSERT OVERWRITE TABLE q4_order_priority_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
index 9af2dd2..838a1e8 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
@@ -17,7 +17,6 @@
-- create the target table
create table q5_local_supplier_volume (N_NAME STRING, REVENUE DOUBLE);
-set mapred.min.split.size=536870912;
-- the query
insert overwrite table q5_local_supplier_volume
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
index 2678f80..12ae8ae 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
@@ -17,9 +17,6 @@
create table q7_volume_shipping (supp_nation string, cust_nation string, l_year int, revenue double);
create table q7_volume_shipping_tmp(supp_nation string, cust_nation string, s_nationkey int, c_nationkey int);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1225000000;
-
-- the query
insert overwrite table q7_volume_shipping_tmp
select
diff --git a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
index 2e5b4a1..c491997 100644
--- a/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
+++ b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
@@ -17,9 +17,6 @@
-- create the result table
create table q9_product_type_profit (nation string, o_year string, sum_profit double);
-set mapred.min.split.size=536870912;
-set hive.exec.reducers.bytes.per.reducer=1024000000;
-
-- the query
insert overwrite table q9_product_type_profit
select