merge back fullstack_release_cleanup
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_asterix_stabilization@3095 123451ca-8445-de46-9d55-352943316053
diff --git a/hivesterix/conf/cluster b/hivesterix/conf/cluster
deleted file mode 100644
index 6cc8cca..0000000
--- a/hivesterix/conf/cluster
+++ /dev/null
@@ -1,11 +0,0 @@
-4
-10.0.0.1 asterix-001
-10.0.0.2 asterix-002
-10.0.0.3 asterix-003
-10.0.0.4 asterix-004
-10.0.0.5 asterix-005
-10.0.0.6 asterix-006
-10.0.0.7 asterix-007
-10.0.0.8 asterix-008
-10.0.0.9 asterix-009
-10.0.0.10 asterix-010
diff --git a/hivesterix/hivesterix-common/pom.xml b/hivesterix/hivesterix-common/pom.xml
new file mode 100644
index 0000000..33d8fb3
--- /dev/null
+++ b/hivesterix/hivesterix-common/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hivesterix-common</artifactId>
+ <name>hivesterix-common</name>
+
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
new file mode 100644
index 0000000..666d361
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
@@ -0,0 +1,174 @@
+package edu.uci.ics.hivesterix.common.config;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class ConfUtil {
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
+
+ private static JobConf job;
+ private static HiveConf hconf;
+ private static String[] NCs;
+ private static Map<String, List<String>> ncMapping;
+ private static IHyracksClientConnection hcc = null;
+ private static ClusterTopology topology = null;
+ private static Properties clusterProps;
+ private static Map<String, NodeControllerInfo> ncNameToNcInfos;
+
+ public static JobConf getJobConf(Class<? extends InputFormat> format, Path path) {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ conf.setInputFormat(format);
+ FileInputFormat.setInputPaths(conf, path);
+ return conf;
+ }
+
+ public static JobConf getJobConf() {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ return conf;
+ }
+
+ public static void setJobConf(JobConf conf) {
+ job = conf;
+ }
+
+ public static void setHiveConf(HiveConf hiveConf) {
+ hconf = hiveConf;
+ }
+
+ public static HiveConf getHiveConf() {
+ if (hconf == null) {
+ hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path("conf/hive-default.xml"));
+ }
+ return hconf;
+ }
+
+ public static String[] getNCs() throws HyracksException {
+ if (NCs == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return NCs;
+ }
+
+ public static Map<String, List<String>> getNCMapping() throws HyracksException {
+ if (ncMapping == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return ncMapping;
+ }
+
+ public static Map<String, NodeControllerInfo> getNodeControllerInfo() throws HyracksException {
+ if (ncNameToNcInfos == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return ncNameToNcInfos;
+ }
+
+ private static void loadClusterConfig() {
+ try {
+ getHiveConf();
+
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
+ Process process = Runtime.getRuntime().exec("src/main/resources/scripts/getip.sh");
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(new DataInputStream(
+ process.getInputStream())));
+ String ipAddress = ipReader.readLine();
+ ipReader.close();
+ int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ int mpl = Integer.parseInt(hconf.get("hive.hyracks.parrallelism"));
+
+ hcc = new HyracksConnection(ipAddress, port);
+ topology = hcc.getClusterTopology();
+ ncNameToNcInfos = hcc.getNodeControllerInfos();
+ NCs = new String[ncNameToNcInfos.size() * mpl];
+ ncMapping = new HashMap<String, List<String>>();
+ int i = 0;
+ for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
+ String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().getIpAddress())
+ .getHostAddress();
+ List<String> matchedNCs = ncMapping.get(ipAddr);
+ if (matchedNCs == null) {
+ matchedNCs = new ArrayList<String>();
+ ncMapping.put(ipAddr, matchedNCs);
+ }
+ matchedNCs.add(entry.getKey());
+ for (int j = i * mpl; j < i * mpl + mpl; j++)
+ NCs[j] = entry.getKey();
+ i++;
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static ClusterTopology getClusterTopology() {
+ if (topology == null)
+ loadClusterConfig();
+ return topology;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
new file mode 100644
index 0000000..8fb715b
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+/**
+ * some constants for expression
+ *
+ * @author yingyib
+ */
+public class ExpressionConstant {
+
+ /**
+ * name space for function identifier
+ */
+ public static String NAMESPACE = "hive";
+
+ /**
+ * field expression: modeled as function in Algebricks
+ */
+ public static String FIELDACCESS = "fieldaccess";
+
+ /**
+ * null string: modeled as null in Algebricks
+ */
+ public static String NULL = "null";
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
new file mode 100644
index 0000000..662ed83
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
@@ -0,0 +1,209 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class ExpressionTranslator {
+
+ public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+ FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ Object info = ((HiveFunctionInfo) funcInfo).getInfo();
+ ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
+ return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
+ }
+
+ if (fid.getName().equals(ExpressionConstant.NULL)) {
+ return new ExprNodeNullDesc();
+ }
+
+ /**
+ * argument expressions: translate argument expressions recursively
+ * first, this logic is shared in scalar, aggregation and unnesting
+ * function
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ /**
+ * parameters could not be aggregate function desc
+ */
+ ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
+ parameters.add(parameter);
+ }
+
+ /**
+ * get expression
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+
+ int inputSize = parameters.size();
+ List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
+
+ // generate expression tree if necessary
+ while (inputSize > 2) {
+ int pairs = inputSize / 2;
+ for (int i = 0; i < pairs; i++) {
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
+ descs.add(parameters.get(2 * i));
+ descs.add(parameters.get(2 * i + 1));
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ currentDescs.add(desc);
+ }
+
+ if (inputSize % 2 != 0) {
+ // List<ExprNodeDesc> descs = new
+ // ArrayList<ExprNodeDesc>();
+ // ExprNodeDesc lastExpr =
+ // currentDescs.remove(currentDescs.size() - 1);
+ // descs.add(lastExpr);
+ currentDescs.add(parameters.get(inputSize - 1));
+ // ExprNodeDesc desc =
+ // ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ // currentDescs.add(desc);
+ }
+ inputSize = currentDescs.size();
+ parameters.clear();
+ parameters.addAll(currentDescs);
+ currentDescs.clear();
+ }
+
+ } else {
+ Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ if (secondInfo != null) {
+
+ /**
+ * for GenericUDFBridge: we should not call get type of
+ * this hive expression, because parameters may have
+ * been changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
+ .getInfo();
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(fid.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ }
+ /**
+ * get hive generic function expression
+ */
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
+ return desc;
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * set parameters
+ */
+ aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
+
+ List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
+ for (ExprNodeDesc parameter : parameters) {
+ if (parameter.getTypeInfo() instanceof StructTypeInfo) {
+ originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ originalParameterTypeInfos.add(parameter.getTypeInfo());
+ }
+
+ GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
+ aggregateDesc.getGenericUDAFName(), originalParameterTypeInfos, aggregateDesc.getDistinct(),
+ false);
+
+ AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
+ aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
+ return newAggregateDesc;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ String funcName = hiveDesc.getUDTFName();
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ GenericUDTF udtf = fi.getGenericUDTF();
+ UDTFDesc desc = new UDTFDesc(udtf);
+ return desc;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
+ ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
+ return desc;
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
+ /**
+ * get expression for constant in the environment
+ */
+ ConstantExpression varExpr = (ConstantExpression) expr;
+ Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
+ ExprNodeDesc desc = new ExprNodeConstantDesc(value);
+ return desc;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
new file mode 100644
index 0000000..56890eb
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+public class HiveAlgebricksBuiltInFunctionMap {
+
+ /**
+ * hive auqa builtin function map instance
+ */
+ public static HiveAlgebricksBuiltInFunctionMap INSTANCE = new HiveAlgebricksBuiltInFunctionMap();
+
+ /**
+ * hive to Algebricks function name mapping
+ */
+ private HashMap<String, FunctionIdentifier> hiveToAlgebricksMap = new HashMap<String, FunctionIdentifier>();
+
+ /**
+ * Algebricks to hive function name mapping
+ */
+ private HashMap<FunctionIdentifier, String> AlgebricksToHiveMap = new HashMap<FunctionIdentifier, String>();
+
+ /**
+ * the bi-directional mapping between hive functions and Algebricks
+ * functions
+ */
+ private HiveAlgebricksBuiltInFunctionMap() {
+ hiveToAlgebricksMap.put("and", AlgebricksBuiltinFunctions.AND);
+ hiveToAlgebricksMap.put("or", AlgebricksBuiltinFunctions.OR);
+ hiveToAlgebricksMap.put("!", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("not", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("=", AlgebricksBuiltinFunctions.EQ);
+ hiveToAlgebricksMap.put("<>", AlgebricksBuiltinFunctions.NEQ);
+ hiveToAlgebricksMap.put(">", AlgebricksBuiltinFunctions.GT);
+ hiveToAlgebricksMap.put("<", AlgebricksBuiltinFunctions.LT);
+ hiveToAlgebricksMap.put(">=", AlgebricksBuiltinFunctions.GE);
+ hiveToAlgebricksMap.put("<=", AlgebricksBuiltinFunctions.LE);
+
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.AND, "and");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.OR, "or");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "!");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "not");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.EQ, "=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NEQ, "<>");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GT, ">");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LT, "<");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GE, ">=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LE, "<=");
+ }
+
+ /**
+ * get hive function name from Algebricks function identifier
+ *
+ * @param AlgebricksId
+ * @return hive
+ */
+ public String getHiveFunctionName(FunctionIdentifier AlgebricksId) {
+ return AlgebricksToHiveMap.get(AlgebricksId);
+ }
+
+ /**
+ * get hive UDF or Generic class's corresponding built-in functions
+ *
+ * @param funcClass
+ * @return function identifier
+ */
+ public FunctionIdentifier getAlgebricksFunctionId(Class<?> funcClass) {
+ Description annotation = (Description) funcClass.getAnnotation(Description.class);
+ String hiveUDFName = "";
+ if (annotation == null) {
+ hiveUDFName = null;
+ return null;
+ } else {
+ hiveUDFName = annotation.name();
+ return hiveToAlgebricksMap.get(hiveUDFName);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
new file mode 100644
index 0000000..e10e8c1
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
@@ -0,0 +1,179 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HiveExpressionTypeComputer implements IExpressionTypeComputer {
+
+ public static IExpressionTypeComputer INSTANCE = new HiveExpressionTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IMetadataProvider<?, ?> metadataProvider,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+
+ FunctionIdentifier AlgebricksId = funcInfo.getFunctionIdentifier();
+ Object functionInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(AlgebricksId);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+ } else if (functionInfo != null) {
+ /**
+ * for GenericUDFBridge: we should not call get type of this
+ * hive expression, because parameters may have been
+ * changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) functionInfo;
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(AlgebricksId.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ /**
+ * doing the actual type inference
+ */
+ ObjectInspector oi = null;
+ try {
+ oi = udf.initialize(childrenOIs);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
+ return exprType;
+
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(aggregateDesc.getMode(), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ GenericUDTF udtf = hiveDesc.getGenericUDTF();
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = udtf.initialize(childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo type = (TypeInfo) env.getVarType(var);
+ return type;
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+ /**
+ * get type for constant, from its java class
+ */
+ ConstantExpression constExpr = (ConstantExpression) expr;
+ HivesterixConstantValue value = (HivesterixConstantValue) constExpr.getValue();
+ TypeInfo type = TypeInfoFactory.getPrimitiveTypeInfoFromJavaPrimitive(value.getObject().getClass());
+ return type;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
new file mode 100644
index 0000000..ced8d02
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
@@ -0,0 +1,36 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class HiveFunctionInfo implements IFunctionInfo, Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * primary function identifier
+ */
+ private transient FunctionIdentifier fid;
+
+ /**
+ * secondary function identifier: function name
+ */
+ private transient Object secondaryFid;
+
+ public HiveFunctionInfo(FunctionIdentifier fid, Object secondFid) {
+ this.fid = fid;
+ this.secondaryFid = secondFid;
+ }
+
+ @Override
+ public FunctionIdentifier getFunctionIdentifier() {
+ return fid;
+ }
+
+ public Object getInfo() {
+ return secondaryFid;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
new file mode 100644
index 0000000..b77fe49
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * generate merge aggregation expression from an aggregation expression
+ *
+ * @author yingyib
+ */
+public class HiveMergeAggregationExpressionFactory implements IMergeAggregationExpressionFactory {
+
+ public static IMergeAggregationExpressionFactory INSTANCE = new HiveMergeAggregationExpressionFactory();
+
+ @Override
+ public ILogicalExpression createMergeAggregation(ILogicalExpression expr, IOptimizationContext context)
+ throws AlgebricksException {
+ /**
+ * type inference for scalar function
+ */
+ if (expr instanceof AggregateFunctionCallExpression) {
+ AggregateFunctionCallExpression funcExpr = (AggregateFunctionCallExpression) expr;
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregator = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ LogicalVariable inputVar = context.newVar();
+ ExprNodeDesc col = new ExprNodeColumnDesc(TypeInfoFactory.voidTypeInfo, inputVar.toString(), null, false);
+ ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ parameters.add(col);
+
+ GenericUDAFEvaluator.Mode mergeMode;
+ if (aggregator.getMode() == GenericUDAFEvaluator.Mode.PARTIAL1)
+ mergeMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else if (aggregator.getMode() == GenericUDAFEvaluator.Mode.COMPLETE)
+ mergeMode = GenericUDAFEvaluator.Mode.FINAL;
+ else
+ mergeMode = aggregator.getMode();
+ AggregationDesc mergeDesc = new AggregationDesc(aggregator.getGenericUDAFName(),
+ aggregator.getGenericUDAFEvaluator(), parameters, aggregator.getDistinct(), mergeMode);
+
+ String UDAFName = mergeDesc.getGenericUDAFName();
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ arguments.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(inputVar)));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + mergeDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, mergeDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo,
+ false, arguments);
+ return aggregationExpression;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
new file mode 100644
index 0000000..906e3ce
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
@@ -0,0 +1,15 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
+
+public class HiveNullableTypeComputer implements INullableTypeComputer {
+
+ public static INullableTypeComputer INSTANCE = new HiveNullableTypeComputer();
+
+ @Override
+ public Object makeNullableType(Object type) throws AlgebricksException {
+ return type;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
new file mode 100644
index 0000000..c74966c
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HivePartialAggregationTypeComputer implements IPartialAggregationTypeComputer {
+
+ public static IPartialAggregationTypeComputer INSTANCE = new HivePartialAggregationTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IVariableTypeEnvironment env,
+ IMetadataProvider<?, ?> metadataProvider) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ IExpressionTypeComputer tc = HiveExpressionTypeComputer.INSTANCE;
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) tc.getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(getPartialMode(aggregateDesc.getMode()), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+ private Mode getPartialMode(Mode mode) {
+ Mode partialMode;
+ if (mode == Mode.FINAL)
+ partialMode = Mode.PARTIAL2;
+ else if (mode == Mode.COMPLETE)
+ partialMode = Mode.PARTIAL1;
+ else
+ partialMode = mode;
+ return partialMode;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
new file mode 100644
index 0000000..3d35e1f
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
+
+public class HivesterixConstantValue implements IAlgebricksConstantValue {
+
+ private Object object;
+
+ public HivesterixConstantValue(Object object) {
+ this.setObject(object);
+ }
+
+ @Override
+ public boolean isFalse() {
+ return object == Boolean.FALSE;
+ }
+
+ @Override
+ public boolean isNull() {
+ return object == null;
+ }
+
+ @Override
+ public boolean isTrue() {
+ return object == Boolean.TRUE;
+ }
+
+ public void setObject(Object object) {
+ this.object = object;
+ }
+
+ public Object getObject() {
+ return object;
+ }
+
+ @Override
+ public String toString() {
+ return object.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof HivesterixConstantValue)) {
+ return false;
+ }
+ HivesterixConstantValue v2 = (HivesterixConstantValue) o;
+ return object.equals(v2.getObject());
+ }
+
+ @Override
+ public int hashCode() {
+ return object.hashCode();
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
new file mode 100644
index 0000000..0c701c8
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public AppTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( AppTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}
diff --git a/hivesterix/hivesterix-dist/conf/cluster.properties b/hivesterix/hivesterix-dist/conf/cluster.properties
new file mode 100644
index 0000000..2d2401a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/configuration.xsl b/hivesterix/hivesterix-dist/conf/configuration.xsl
similarity index 100%
rename from hivesterix/conf/configuration.xsl
rename to hivesterix/hivesterix-dist/conf/configuration.xsl
diff --git a/hivesterix/hivesterix-dist/conf/debugnc.properties b/hivesterix/hivesterix-dist/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/hive-default.xml b/hivesterix/hivesterix-dist/conf/hive-default.xml
similarity index 98%
rename from hivesterix/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/conf/hive-default.xml
index 034ea61..587eede 100644
--- a/hivesterix/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/conf/hive-default.xml
@@ -23,22 +23,11 @@
By setting this property to -1, Hive will automatically figure out what
should be the number of reducers.
</description>
- </property>
- <property>
- <name>hive.hyracks.host</name>
- <value>128.195.14.4</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>3099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
<property>
<name>hive.hyracks.parrallelism</name>
@@ -52,12 +41,12 @@
<property>
<name>hive.algebricks.groupby.external.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
<name>hive.algebricks.sort.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
diff --git a/hivesterix/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/conf/hive-log4j.properties
similarity index 100%
rename from hivesterix/conf/hive-log4j.properties
rename to hivesterix/hivesterix-dist/conf/hive-log4j.properties
diff --git a/hivesterix/hivesterix-dist/conf/master b/hivesterix/hivesterix-dist/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/conf/slaves b/hivesterix/hivesterix-dist/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/pom.xml b/hivesterix/hivesterix-dist/pom.xml
new file mode 100644
index 0000000..2a7b798
--- /dev/null
+++ b/hivesterix/hivesterix-dist/pom.xml
@@ -0,0 +1,498 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-dist</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <name>hivesterix-dist</name>
+ <dependencies>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>args4j</groupId>
+ <artifactId>args4j</artifactId>
+ <version>2.0.12</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20090211</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ <version>0.9.94</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-core</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-connectionpool</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-enhancer</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-rdbms</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-dbcp</groupId>
+ <artifactId>commons-dbcp</artifactId>
+ <version>1.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-pool</groupId>
+ <artifactId>commons-pool</artifactId>
+ <version>1.5.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>3.2.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax</groupId>
+ <artifactId>jdo2-api</artifactId>
+ <version>2.3-ec</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.facebook</groupId>
+ <artifactId>libfb303</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>cli</artifactId>
+ <version>1.2</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.15</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>3.0.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-hwi</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-service</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-shims</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ <type>jar</type>
+ <classifier>api</classifier>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>r06</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>stringtemplate</artifactId>
+ <version>3.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.derby</groupId>
+ <artifactId>derby</artifactId>
+ <version>10.8.1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.90.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-translator</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-optimizer</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>patch</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <classifier>patch</classifier>
+ <finalName>a-hive</finalName>
+ <includes>
+ <include>**/org/apache/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>appassembler-maven-plugin</artifactId>
+ <version>1.3</version>
+ <executions>
+ <execution>
+ <configuration>
+ <programs>
+ <program>
+ <mainClass>edu.uci.ics.asterix.hive.cli.CliDriver</mainClass>
+ <name>algebricks-hivesterix-cmd</name>
+ </program>
+ </programs>
+ <repositoryLayout>flat</repositoryLayout>
+ <repositoryName>lib</repositoryName>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>assemble</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-assembly-plugin</artifactId>
+ <version>2.2-beta-5</version>
+ <executions>
+ <execution>
+ <configuration>
+ <descriptors>
+ <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+ </descriptors>
+ </configuration>
+ <phase>package</phase>
+ <goals>
+ <goal>attached</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.13</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
+ -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
+ <includes>
+ <include>**/test/optimizer/*TestSuite.java</include>
+ <include>**/test/optimizer/*Test.java</include>
+ <include>**/test/runtimefunction/*TestSuite.java</include>
+ <include>**/test/runtimefunction/*Test.java</include>
+ </includes>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.5</version>
+ <executions>
+ <execution>
+ <id>copy-scripts</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/bin</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/scripts</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ <execution>
+ <id>copy-conf</id>
+ <!-- here the phase you need -->
+ <phase>package</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>target/appassembler/conf</outputDirectory>
+ <resources>
+ <resource>
+ <directory>src/main/resources/conf</directory>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-clean-plugin</artifactId>
+ <version>2.5</version>
+ <configuration>
+ <filesets>
+ <fileset>
+ <directory>.</directory>
+ <includes>
+ <include>metastore*</include>
+ <include>hadoop*</include>
+ <include>edu*</include>
+ <include>tmp*</include>
+ <include>build*</include>
+ <include>target*</include>
+ <include>log*</include>
+ <include>derby.log</include>
+ <include>ClusterController*</include>
+ </includes>
+ </fileset>
+ </filesets>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+</project>
diff --git a/hivesterix/resource/asterix/destroy.sh b/hivesterix/hivesterix-dist/resource/asterix/destroy.sh
similarity index 100%
rename from hivesterix/resource/asterix/destroy.sh
rename to hivesterix/hivesterix-dist/resource/asterix/destroy.sh
diff --git a/hivesterix/resource/asterix/hivedeploy.hcli b/hivesterix/hivesterix-dist/resource/asterix/hivedeploy.hcli
similarity index 100%
rename from hivesterix/resource/asterix/hivedeploy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix/hivedeploy.hcli
diff --git a/hivesterix/resource/asterix/hivedestroy.hcli b/hivesterix/hivesterix-dist/resource/asterix/hivedestroy.hcli
similarity index 100%
rename from hivesterix/resource/asterix/hivedestroy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix/hivedestroy.hcli
diff --git a/hivesterix/resource/asterix/startall.sh b/hivesterix/hivesterix-dist/resource/asterix/startall.sh
similarity index 100%
rename from hivesterix/resource/asterix/startall.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startall.sh
diff --git a/hivesterix/resource/asterix/startcc.sh b/hivesterix/hivesterix-dist/resource/asterix/startcc.sh
similarity index 100%
rename from hivesterix/resource/asterix/startcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startcc.sh
diff --git a/hivesterix/resource/asterix/startnc.sh b/hivesterix/hivesterix-dist/resource/asterix/startnc.sh
similarity index 100%
rename from hivesterix/resource/asterix/startnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/startnc.sh
diff --git a/hivesterix/resource/asterix/stopall.sh b/hivesterix/hivesterix-dist/resource/asterix/stopall.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopall.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopall.sh
diff --git a/hivesterix/resource/asterix/stopcc.sh b/hivesterix/hivesterix-dist/resource/asterix/stopcc.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopcc.sh
diff --git a/hivesterix/resource/asterix/stopnc.sh b/hivesterix/hivesterix-dist/resource/asterix/stopnc.sh
similarity index 100%
rename from hivesterix/resource/asterix/stopnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix/stopnc.sh
diff --git a/hivesterix/resource/asterix_dbg/destroy.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/destroy.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/destroy.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/destroy.sh
diff --git a/hivesterix/resource/asterix_dbg/hivedeploy.hcli b/hivesterix/hivesterix-dist/resource/asterix_dbg/hivedeploy.hcli
similarity index 100%
rename from hivesterix/resource/asterix_dbg/hivedeploy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/hivedeploy.hcli
diff --git a/hivesterix/resource/asterix_dbg/hivedestroy.hcli b/hivesterix/hivesterix-dist/resource/asterix_dbg/hivedestroy.hcli
similarity index 100%
rename from hivesterix/resource/asterix_dbg/hivedestroy.hcli
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/hivedestroy.hcli
diff --git a/hivesterix/resource/asterix_dbg/startall.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startall.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startall.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startall.sh
diff --git a/hivesterix/resource/asterix_dbg/startcc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startcc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startcc.sh
diff --git a/hivesterix/resource/asterix_dbg/startnc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/startnc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/startnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/startnc.sh
diff --git a/hivesterix/resource/asterix_dbg/stopall.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopall.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopall.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopall.sh
diff --git a/hivesterix/resource/asterix_dbg/stopcc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopcc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopcc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopcc.sh
diff --git a/hivesterix/resource/asterix_dbg/stopnc.sh b/hivesterix/hivesterix-dist/resource/asterix_dbg/stopnc.sh
similarity index 100%
rename from hivesterix/resource/asterix_dbg/stopnc.sh
rename to hivesterix/hivesterix-dist/resource/asterix_dbg/stopnc.sh
diff --git a/hivesterix/resource/bin/ext/cli.sh b/hivesterix/hivesterix-dist/resource/bin/ext/cli.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/cli.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/cli.sh
diff --git a/hivesterix/resource/bin/ext/help.sh b/hivesterix/hivesterix-dist/resource/bin/ext/help.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/help.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/help.sh
diff --git a/hivesterix/resource/bin/ext/hiveserver.sh b/hivesterix/hivesterix-dist/resource/bin/ext/hiveserver.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/hiveserver.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/hiveserver.sh
diff --git a/hivesterix/resource/bin/ext/hwi.sh b/hivesterix/hivesterix-dist/resource/bin/ext/hwi.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/hwi.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/hwi.sh
diff --git a/hivesterix/resource/bin/ext/jar.sh b/hivesterix/hivesterix-dist/resource/bin/ext/jar.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/jar.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/jar.sh
diff --git a/hivesterix/resource/bin/ext/lineage.sh b/hivesterix/hivesterix-dist/resource/bin/ext/lineage.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/lineage.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/lineage.sh
diff --git a/hivesterix/resource/bin/ext/metastore.sh b/hivesterix/hivesterix-dist/resource/bin/ext/metastore.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/metastore.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/metastore.sh
diff --git a/hivesterix/resource/bin/ext/rcfilecat.sh b/hivesterix/hivesterix-dist/resource/bin/ext/rcfilecat.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/rcfilecat.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/rcfilecat.sh
diff --git a/hivesterix/resource/bin/ext/util/execHiveCmd.sh b/hivesterix/hivesterix-dist/resource/bin/ext/util/execHiveCmd.sh
similarity index 100%
rename from hivesterix/resource/bin/ext/util/execHiveCmd.sh
rename to hivesterix/hivesterix-dist/resource/bin/ext/util/execHiveCmd.sh
diff --git a/hivesterix/resource/bin/hive b/hivesterix/hivesterix-dist/resource/bin/hive
similarity index 100%
rename from hivesterix/resource/bin/hive
rename to hivesterix/hivesterix-dist/resource/bin/hive
diff --git a/hivesterix/resource/bin/hive-config.sh b/hivesterix/hivesterix-dist/resource/bin/hive-config.sh
similarity index 100%
rename from hivesterix/resource/bin/hive-config.sh
rename to hivesterix/hivesterix-dist/resource/bin/hive-config.sh
diff --git a/hivesterix/resource/bin/init-hive-dfs.sh b/hivesterix/hivesterix-dist/resource/bin/init-hive-dfs.sh
similarity index 100%
rename from hivesterix/resource/bin/init-hive-dfs.sh
rename to hivesterix/hivesterix-dist/resource/bin/init-hive-dfs.sh
diff --git a/hivesterix/resource/deploy/balance.jar b/hivesterix/hivesterix-dist/resource/deploy/balance.jar
similarity index 100%
rename from hivesterix/resource/deploy/balance.jar
rename to hivesterix/hivesterix-dist/resource/deploy/balance.jar
Binary files differ
diff --git a/hivesterix/resource/deploy/deploy.sh b/hivesterix/hivesterix-dist/resource/deploy/deploy.sh
similarity index 100%
rename from hivesterix/resource/deploy/deploy.sh
rename to hivesterix/hivesterix-dist/resource/deploy/deploy.sh
diff --git a/hivesterix/resource/deploy/jar.sh b/hivesterix/hivesterix-dist/resource/deploy/jar.sh
similarity index 100%
rename from hivesterix/resource/deploy/jar.sh
rename to hivesterix/hivesterix-dist/resource/deploy/jar.sh
diff --git a/hivesterix/resource/hivesterix/execute.sh b/hivesterix/hivesterix-dist/resource/hivesterix/execute.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/execute.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/execute.sh
diff --git a/hivesterix/resource/hivesterix/loop.sh b/hivesterix/hivesterix-dist/resource/hivesterix/loop.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/loop.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/loop.sh
diff --git a/hivesterix/resource/hivesterix/perf.sh b/hivesterix/hivesterix-dist/resource/hivesterix/perf.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/perf.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/perf.sh
diff --git a/hivesterix/resource/hivesterix/perflog/tpch100/result.log b/hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch100/result.log
similarity index 100%
rename from hivesterix/resource/hivesterix/perflog/tpch100/result.log
rename to hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch100/result.log
diff --git a/hivesterix/resource/hivesterix/perflog/tpch_sample/result.log b/hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch_sample/result.log
similarity index 100%
rename from hivesterix/resource/hivesterix/perflog/tpch_sample/result.log
rename to hivesterix/hivesterix-dist/resource/hivesterix/perflog/tpch_sample/result.log
diff --git a/hivesterix/resource/hivesterix/startcluster.sh b/hivesterix/hivesterix-dist/resource/hivesterix/startcluster.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/startcluster.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/startcluster.sh
diff --git a/hivesterix/resource/hivesterix/stress.sh b/hivesterix/hivesterix-dist/resource/hivesterix/stress.sh
similarity index 100%
rename from hivesterix/resource/hivesterix/stress.sh
rename to hivesterix/hivesterix-dist/resource/hivesterix/stress.sh
diff --git a/hivesterix/resource/hivesterix/tpch100/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q10_returned_item.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q11_important_stock.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q12_shipping.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q13_customer_distribution.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q14_promotion_effect.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q15_top_supplier.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q3_shipping_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q4_order_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q7_volume_shipping.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q8_national_market_share.hive
diff --git a/hivesterix/resource/hivesterix/tpch100/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch100/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch100/q9_product_type_profit.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q3_shipping_priority.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/hivesterix/tpch_sample/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/hivesterix/tpch_sample/q9_product_type_profit.hive
diff --git a/hivesterix/resource/tpch/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/tpch/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/tpch/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q10_returned_item.hive
diff --git a/hivesterix/resource/tpch/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/tpch/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/tpch/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q11_important_stock.hive
diff --git a/hivesterix/resource/tpch/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q12_shipping.hive
diff --git a/hivesterix/resource/tpch/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/tpch/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/tpch/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q13_customer_distribution.hive
diff --git a/hivesterix/resource/tpch/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/tpch/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/tpch/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q14_promotion_effect.hive
diff --git a/hivesterix/resource/tpch/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q15_top_supplier.hive
diff --git a/hivesterix/resource/tpch/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/tpch/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/tpch/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/tpch/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/tpch/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/tpch/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/tpch/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/tpch/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/tpch/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/tpch/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/tpch/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/tpch/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/tpch/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/tpch/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/tpch/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/tpch/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/tpch/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/tpch/q2_copy.hive b/hivesterix/hivesterix-dist/resource/tpch/q2_copy.hive
similarity index 100%
rename from hivesterix/resource/tpch/q2_copy.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q2_copy.hive
diff --git a/hivesterix/resource/tpch/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/tpch/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/tpch/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q3_shipping_priority.hive
diff --git a/hivesterix/resource/tpch/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/tpch/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q4_order_priority.hive
diff --git a/hivesterix/resource/tpch/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/tpch/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/tpch/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/tpch/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/tpch/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/tpch/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/tpch/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q7_volume_shipping.hive
diff --git a/hivesterix/resource/tpch/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/tpch/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/tpch/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q8_national_market_share.hive
diff --git a/hivesterix/resource/tpch/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/tpch/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/tpch/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/tpch/q9_product_type_profit.hive
diff --git a/hivesterix/resource/tpch100/q10_returned_item.hive b/hivesterix/hivesterix-dist/resource/tpch100/q10_returned_item.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q10_returned_item.hive
diff --git a/hivesterix/resource/tpch100/q11_important_stock.hive b/hivesterix/hivesterix-dist/resource/tpch100/q11_important_stock.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q11_important_stock.hive
diff --git a/hivesterix/resource/tpch100/q12_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch100/q12_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q12_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q12_shipping.hive
diff --git a/hivesterix/resource/tpch100/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/resource/tpch100/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q13_customer_distribution.hive
diff --git a/hivesterix/resource/tpch100/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/resource/tpch100/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q14_promotion_effect.hive
diff --git a/hivesterix/resource/tpch100/q15_top_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch100/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q15_top_supplier.hive
diff --git a/hivesterix/resource/tpch100/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/resource/tpch100/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/resource/tpch100/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch100/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/resource/tpch100/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/resource/tpch100/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q18_large_volume_customer.hive
diff --git a/hivesterix/resource/tpch100/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/resource/tpch100/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q19_discounted_revenue.hive
diff --git a/hivesterix/resource/tpch100/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/resource/tpch100/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q1_pricing_summary_report.hive
diff --git a/hivesterix/resource/tpch100/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/resource/tpch100/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q20_potential_part_promotion.hive
diff --git a/hivesterix/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/resource/tpch100/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/resource/tpch100/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q22_global_sales_opportunity.hive
diff --git a/hivesterix/resource/tpch100/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/resource/tpch100/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/resource/tpch100/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/resource/tpch100/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q3_shipping_priority.hive
diff --git a/hivesterix/resource/tpch100/q4_order_priority.hive b/hivesterix/hivesterix-dist/resource/tpch100/q4_order_priority.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q4_order_priority.hive
diff --git a/hivesterix/resource/tpch100/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/resource/tpch100/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q5_local_supplier_volume.hive
diff --git a/hivesterix/resource/tpch100/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/resource/tpch100/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q6_forecast_revenue_change.hive
diff --git a/hivesterix/resource/tpch100/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/resource/tpch100/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q7_volume_shipping.hive
diff --git a/hivesterix/resource/tpch100/q8_national_market_share.hive b/hivesterix/hivesterix-dist/resource/tpch100/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q8_national_market_share.hive
diff --git a/hivesterix/resource/tpch100/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/resource/tpch100/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/resource/tpch100/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/resource/tpch100/q9_product_type_profit.hive
diff --git a/hivesterix/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
similarity index 100%
rename from hivesterix/src/main/assembly/binary-assembly.xml
rename to hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
similarity index 93%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
rename to hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index ed5ab70..e6f47cf 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -1,5 +1,10 @@
package edu.uci.ics.hivesterix.runtime.exec;
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Serializable;
import java.util.ArrayList;
@@ -8,6 +13,7 @@
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Properties;
import java.util.Set;
import org.apache.commons.logging.Log;
@@ -27,6 +33,7 @@
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
import edu.uci.ics.hivesterix.logical.expression.HiveMergeAggregationExpressionFactory;
import edu.uci.ics.hivesterix.logical.expression.HiveNullableTypeComputer;
@@ -34,7 +41,6 @@
import edu.uci.ics.hivesterix.logical.plan.HiveAlgebricksTranslator;
import edu.uci.ics.hivesterix.logical.plan.HiveLogicalPlanAndMetaData;
import edu.uci.ics.hivesterix.optimizer.rulecollections.HiveRuleCollections;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
import edu.uci.ics.hivesterix.runtime.factory.evaluator.HiveExpressionRuntimeProvider;
import edu.uci.ics.hivesterix.runtime.factory.nullwriter.HiveNullWriterFactory;
import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryBooleanInspectorFactory;
@@ -73,6 +79,7 @@
public class HyracksExecutionEngine implements IExecutionEngine {
private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
@@ -139,6 +146,11 @@
*/
private PrintWriter planPrinter;
+ /**
+ * properties
+ */
+ private Properties clusterProps;
+
public HyracksExecutionEngine(HiveConf conf) {
this.conf = conf;
init(conf);
@@ -220,8 +232,12 @@
}
private void codeGen() throws AlgebricksException {
- // number of cpu cores in the cluster
- builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));
+ try {
+ // number of cpu cores in the cluster
+ builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
// builder.setClusterTopology(ConfUtil.getClusterTopology());
builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);
builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);
@@ -251,7 +267,7 @@
public int executeJob() {
try {
codeGen();
- executeHyraxJob(jobSpec);
+ executeHyracksJob(jobSpec);
} catch (Exception e) {
e.printStackTrace();
return 1;
@@ -505,20 +521,32 @@
}
}
- private void executeHyraxJob(JobSpecification job) throws Exception {
- String ipAddress = conf.get("hive.hyracks.host");
- int port = Integer.parseInt(conf.get("hive.hyracks.port"));
- String applicationName = conf.get("hive.hyracks.app");
- //System.out.println("connect to " + ipAddress + " " + port);
+ private void executeHyracksJob(JobSpecification job) throws Exception {
+
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
+
+ Process process = Runtime.getRuntime().exec("src/main/resources/scripts/getip.sh");
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(
+ new DataInputStream(process.getInputStream())));
+ String ipAddress = ipReader.readLine();
+ ipReader.close();
+ int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ String applicationName = "hivesterix";
IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
-
- //System.out.println("get connected");
long start = System.currentTimeMillis();
JobId jobId = hcc.startJob(applicationName, job);
hcc.waitForCompletion(jobId);
- //System.out.println("job finished: " + jobId.toString());
+ // System.out.println("job finished: " + jobId.toString());
// call all leave nodes to end
for (Operator leaf : leaveOps) {
jobClose(leaf);
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
new file mode 100644
index 0000000..c64a39b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.hivesterix.runtime.exec;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.Task;
+
+public interface IExecutionEngine {
+
+ /**
+ * compile the job
+ *
+ * @param rootTasks
+ * : Hive MapReduce plan
+ * @return 0 pass, 1 fail
+ */
+ public int compileJob(List<Task<? extends Serializable>> rootTasks);
+
+ /**
+ * execute the job with latest compiled plan
+ *
+ * @return
+ */
+ public int executeJob();
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
new file mode 100644
index 0000000..a385742
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -0,0 +1,1310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import java.io.DataInput;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.TaskResult;
+import org.apache.hadoop.hive.ql.exec.TaskRunner;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
+import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
+import org.apache.hadoop.hive.ql.hooks.Hook;
+import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.apache.hadoop.hive.ql.hooks.PostExecute;
+import org.apache.hadoop.hive.ql.hooks.PreExecute;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
+import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
+
+@SuppressWarnings({ "deprecation", "unused" })
+public class Driver implements CommandProcessor {
+
+ static final private Log LOG = LogFactory.getLog(Driver.class.getName());
+ static final private LogHelper console = new LogHelper(LOG);
+
+ // hive-sterix
+ private IExecutionEngine engine;
+ private boolean hivesterix = false;
+
+ private int maxRows = 100;
+ ByteStream.Output bos = new ByteStream.Output();
+
+ private HiveConf conf;
+ private DataInput resStream;
+ private Context ctx;
+ private QueryPlan plan;
+ private Schema schema;
+ private HiveLockManager hiveLockMgr;
+
+ private String errorMessage;
+ private String SQLState;
+
+ // A limit on the number of threads that can be launched
+ private int maxthreads;
+ private final int sleeptime = 2000;
+
+ protected int tryCount = Integer.MAX_VALUE;
+
+ private int checkLockManager() {
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (supportConcurrency && (hiveLockMgr == null)) {
+ try {
+ setLockManager();
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ }
+ }
+ return (0);
+ }
+
+ private void setLockManager() throws SemanticException {
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (supportConcurrency) {
+ String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER);
+ if ((lockMgr == null) || (lockMgr.isEmpty())) {
+ throw new SemanticException(ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg());
+ }
+
+ try {
+ hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
+ hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
+ } catch (Exception e) {
+ throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
+ }
+ }
+ }
+
+ public void init() {
+ Operator.resetId();
+ }
+
+ /**
+ * Return the status information about the Map-Reduce cluster
+ */
+ public ClusterStatus getClusterStatus() throws Exception {
+ ClusterStatus cs;
+ try {
+ JobConf job = new JobConf(conf, ExecDriver.class);
+ JobClient jc = new JobClient(job);
+ cs = jc.getClusterStatus();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+ LOG.info("Returning cluster status: " + cs.toString());
+ return cs;
+ }
+
+ public Schema getSchema() {
+ return schema;
+ }
+
+ /**
+ * Get a Schema with fields represented with native Hive types
+ */
+ public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
+ Schema schema = null;
+
+ // If we have a plan, prefer its logical result schema if it's
+ // available; otherwise, try digging out a fetch task; failing that,
+ // give up.
+ if (sem == null) {
+ // can't get any info without a plan
+ } else if (sem.getResultSchema() != null) {
+ List<FieldSchema> lst = sem.getResultSchema();
+ schema = new Schema(lst, null);
+ } else if (sem.getFetchTask() != null) {
+ FetchTask ft = sem.getFetchTask();
+ TableDesc td = ft.getTblDesc();
+ // partitioned tables don't have tableDesc set on the FetchTask.
+ // Instead
+ // they have a list of PartitionDesc objects, each with a table
+ // desc.
+ // Let's
+ // try to fetch the desc for the first partition and use it's
+ // deserializer.
+ if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
+ if (ft.getWork().getPartDesc().size() > 0) {
+ td = ft.getWork().getPartDesc().get(0).getTableDesc();
+ }
+ }
+
+ if (td == null) {
+ LOG.info("No returning schema.");
+ } else {
+ String tableName = "result";
+ List<FieldSchema> lst = null;
+ try {
+ lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer());
+ } catch (Exception e) {
+ LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ }
+ if (lst != null) {
+ schema = new Schema(lst, null);
+ }
+ }
+ }
+ if (schema == null) {
+ schema = new Schema();
+ }
+ LOG.info("Returning Hive schema: " + schema);
+ return schema;
+ }
+
+ /**
+ * Get a Schema with fields represented with Thrift DDL types
+ */
+ public Schema getThriftSchema() throws Exception {
+ Schema schema;
+ try {
+ schema = getSchema();
+ if (schema != null) {
+ List<FieldSchema> lst = schema.getFieldSchemas();
+ // Go over the schema and convert type to thrift type
+ if (lst != null) {
+ for (FieldSchema f : lst) {
+ f.setType(MetaStoreUtils.typeToThriftType(f.getType()));
+ }
+ }
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw e;
+ }
+ LOG.info("Returning Thrift schema: " + schema);
+ return schema;
+ }
+
+ /**
+ * Return the maximum number of rows returned by getResults
+ */
+ public int getMaxRows() {
+ return maxRows;
+ }
+
+ /**
+ * Set the maximum number of rows returned by getResults
+ */
+ public void setMaxRows(int maxRows) {
+ this.maxRows = maxRows;
+ }
+
+ public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
+ if (tasks == null) {
+ return false;
+ }
+
+ boolean hasReduce = false;
+ for (Task<? extends Serializable> task : tasks) {
+ if (task.hasReduce()) {
+ return true;
+ }
+
+ hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
+ }
+ return hasReduce;
+ }
+
+ /**
+ * for backwards compatibility with current tests
+ */
+ public Driver(HiveConf conf) {
+ this.conf = conf;
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ public Driver() {
+ if (SessionState.get() != null) {
+ conf = SessionState.get().getConf();
+ }
+
+ // hivesterix
+ engine = new HyracksExecutionEngine(conf);
+ }
+
+ // hivesterix: plan printer
+ public Driver(HiveConf conf, PrintWriter planPrinter) {
+ this.conf = conf;
+ engine = new HyracksExecutionEngine(conf, planPrinter);
+ }
+
+ public void clear() {
+ this.hivesterix = false;
+ }
+
+ /**
+ * Compile a new query. Any currently-planned query associated with this
+ * Driver is discarded.
+ *
+ * @param command
+ * The SQL query to compile.
+ */
+ public int compile(String command) {
+ if (plan != null) {
+ close();
+ plan = null;
+ }
+
+ TaskFactory.resetId();
+
+ try {
+ command = new VariableSubstitution().substitute(conf, command);
+ ctx = new Context(conf);
+
+ ParseDriver pd = new ParseDriver();
+ ASTNode tree = pd.parse(command, ctx);
+ tree = ParseUtils.findRootNonNullToken(tree);
+
+ BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
+ List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+
+ // Do semantic analysis and plan generation
+ if (saHooks != null) {
+ HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
+ hookCtx.setConf(conf);
+ for (AbstractSemanticAnalyzerHook hook : saHooks) {
+ tree = hook.preAnalyze(hookCtx, tree);
+ }
+ sem.analyze(tree, ctx);
+ for (AbstractSemanticAnalyzerHook hook : saHooks) {
+ hook.postAnalyze(hookCtx, sem.getRootTasks());
+ }
+ } else {
+ sem.analyze(tree, ctx);
+ }
+
+ LOG.info("Semantic Analysis Completed");
+
+ // validate the plan
+ sem.validate();
+
+ plan = new QueryPlan(command, sem);
+ // initialize FetchTask right here
+ if (plan.getFetchTask() != null) {
+ plan.getFetchTask().initialize(conf, plan, null);
+ }
+
+ // get the output schema
+ schema = getSchema(sem, conf);
+
+ // test Only - serialize the query plan and deserialize it
+ if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
+ LOG.info("query plan = " + queryPlanFileName);
+ queryPlanFileName = new Path(queryPlanFileName).toUri().getPath();
+
+ // serialize the queryPlan
+ FileOutputStream fos = new FileOutputStream(queryPlanFileName);
+ Utilities.serializeQueryPlan(plan, fos);
+ fos.close();
+
+ // deserialize the queryPlan
+ FileInputStream fis = new FileInputStream(queryPlanFileName);
+ QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
+ fis.close();
+
+ // Use the deserialized plan
+ plan = newPlan;
+ }
+
+ // initialize FetchTask right here
+ if (plan.getFetchTask() != null) {
+ plan.getFetchTask().initialize(conf, plan, null);
+ }
+
+ // do the authorization check
+ if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
+ try {
+ // doAuthorization(sem);
+ } catch (AuthorizationException authExp) {
+ console.printError("Authorization failed:" + authExp.getMessage()
+ + ". Use show grant to get more details.");
+ return 403;
+ }
+ }
+
+ // hyracks run
+ if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+ hivesterix = true;
+ return engine.compileJob(sem.getRootTasks());
+ }
+
+ return 0;
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ } catch (ParseException e) {
+ errorMessage = "FAILED: Parse Error: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (11);
+ } catch (Exception e) {
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ }
+ }
+
+ private void doAuthorization(BaseSemanticAnalyzer sem) throws HiveException, AuthorizationException {
+ HashSet<ReadEntity> inputs = sem.getInputs();
+ HashSet<WriteEntity> outputs = sem.getOutputs();
+ SessionState ss = SessionState.get();
+ HiveOperation op = ss.getHiveOperation();
+ Hive db = sem.getDb();
+ if (op != null) {
+ if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ } else {
+ // if (op.equals(HiveOperation.IMPORT)) {
+ // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+ // if (!isa.existsTable()) {
+ ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+ HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+ // }
+ // }
+ }
+ if (outputs != null && outputs.size() > 0) {
+ for (WriteEntity write : outputs) {
+
+ if (write.getType() == WriteEntity.Type.PARTITION) {
+ Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
+ if (part != null) {
+ ss.getAuthorizer().authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
+ continue;
+ }
+ }
+
+ if (write.getTable() != null) {
+ ss.getAuthorizer().authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
+ }
+ }
+
+ }
+ }
+
+ if (inputs != null && inputs.size() > 0) {
+
+ Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
+ Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
+
+ Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
+ for (ReadEntity read : inputs) {
+ if (read.getPartition() != null) {
+ Table tbl = read.getTable();
+ String tblName = tbl.getTableName();
+ if (tableUsePartLevelAuth.get(tblName) == null) {
+ boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
+ .equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
+ if (usePartLevelPriv) {
+ tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
+ } else {
+ tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
+ }
+ }
+ }
+ }
+
+ if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
+ SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
+ ParseContext parseCtx = querySem.getParseContext();
+ Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
+
+ for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+ .getTopOps().entrySet()) {
+ Operator<? extends Serializable> topOp = topOpMap.getValue();
+ if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
+ TableScanOperator tableScanOp = (TableScanOperator) topOp;
+ Table tbl = tsoTopMap.get(tableScanOp);
+ List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
+ List<FieldSchema> columns = tbl.getCols();
+ List<String> cols = new ArrayList<String>();
+ if (neededColumnIds != null && neededColumnIds.size() > 0) {
+ for (int i = 0; i < neededColumnIds.size(); i++) {
+ cols.add(columns.get(neededColumnIds.get(i)).getName());
+ }
+ } else {
+ for (int i = 0; i < columns.size(); i++) {
+ cols.add(columns.get(i).getName());
+ }
+ }
+ if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+ String alias_id = topOpMap.getKey();
+ PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
+ parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
+ parseCtx.getPrunedPartitions());
+ Set<Partition> parts = new HashSet<Partition>();
+ parts.addAll(partsList.getConfirmedPartns());
+ parts.addAll(partsList.getUnknownPartns());
+ for (Partition part : parts) {
+ List<String> existingCols = part2Cols.get(part);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ part2Cols.put(part, existingCols);
+ }
+ } else {
+ List<String> existingCols = tab2Cols.get(tbl);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ tab2Cols.put(tbl, existingCols);
+ }
+ }
+ }
+ }
+
+ // cache the results for table authorization
+ Set<String> tableAuthChecked = new HashSet<String>();
+ for (ReadEntity read : inputs) {
+ Table tbl = null;
+ if (read.getPartition() != null) {
+ tbl = read.getPartition().getTable();
+ // use partition level authorization
+ if (tableUsePartLevelAuth.get(tbl.getTableName())) {
+ List<String> cols = part2Cols.get(read.getPartition());
+ if (cols != null && cols.size() > 0) {
+ ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+ op.getInputRequiredPrivileges(), null);
+ } else {
+ ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+ }
+ continue;
+ }
+ } else if (read.getTable() != null) {
+ tbl = read.getTable();
+ }
+
+ // if we reach here, it means it needs to do a table
+ // authorization
+ // check, and the table authorization may already happened
+ // because of other
+ // partitions
+ if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+ List<String> cols = tab2Cols.get(tbl);
+ if (cols != null && cols.size() > 0) {
+ ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
+ } else {
+ ss.getAuthorizer().authorize(tbl, op.getInputRequiredPrivileges(), null);
+ }
+ tableAuthChecked.add(tbl.getTableName());
+ }
+ }
+
+ }
+ }
+
+ /**
+ * @return The current query plan associated with this Driver, if any.
+ */
+ public QueryPlan getPlan() {
+ return plan;
+ }
+
+ /**
+ * @param t
+ * The table to be locked
+ * @param p
+ * The partition to be locked
+ * @param mode
+ * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
+ * objects to be locked. If a partition needs to be locked (in
+ * any mode), all its parents should also be locked in SHARED
+ * mode.
+ **/
+ private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
+ List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
+
+ HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+ .currentTimeMillis()), "IMPLICIT");
+
+ if (t != null) {
+ locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
+ mode = HiveLockMode.SHARED;
+ locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
+ return locks;
+ }
+
+ if (p != null) {
+ if (!(p instanceof DummyPartition)) {
+ locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
+ }
+
+ // All the parents are locked in shared mode
+ mode = HiveLockMode.SHARED;
+
+ // For dummy partitions, only partition name is needed
+ String name = p.getName();
+
+ if (p instanceof DummyPartition) {
+ name = p.getName().split("@")[2];
+ }
+
+ String partName = name;
+ String partialName = "";
+ String[] partns = name.split("/");
+ int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+ for (int idx = 0; idx < len; idx++) {
+ String partn = partns[idx];
+ partialName += partn;
+ try {
+ locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
+ .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+ partialName += "/";
+ } catch (HiveException e) {
+ throw new SemanticException(e.getMessage());
+ }
+ }
+
+ locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
+ locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
+ }
+ return locks;
+ }
+
+ /**
+ * Acquire read and write locks needed by the statement. The list of objects
+ * to be locked are obtained from he inputs and outputs populated by the
+ * compiler. The lock acuisition scheme is pretty simple. If all the locks
+ * cannot be obtained, error out. Deadlock is avoided by making sure that
+ * the locks are lexicographically sorted.
+ **/
+ public int acquireReadWriteLocks() {
+ try {
+ int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
+ int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+
+ boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+ if (!supportConcurrency) {
+ return 0;
+ }
+
+ List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
+
+ // Sort all the inputs, outputs.
+ // If a lock needs to be acquired on any partition, a read lock
+ // needs to be acquired on all
+ // its parents also
+ for (ReadEntity input : plan.getInputs()) {
+ if (input.getType() == ReadEntity.Type.TABLE) {
+ lockObjects.addAll(getLockObjects(input.getTable(), null, HiveLockMode.SHARED));
+ } else {
+ lockObjects.addAll(getLockObjects(null, input.getPartition(), HiveLockMode.SHARED));
+ }
+ }
+
+ for (WriteEntity output : plan.getOutputs()) {
+ if (output.getTyp() == WriteEntity.Type.TABLE) {
+ lockObjects.addAll(getLockObjects(output.getTable(), null,
+ output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+ } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
+ lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+ }
+ // In case of dynamic queries, it is possible to have incomplete
+ // dummy partitions
+ else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
+ lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+ }
+ }
+
+ if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
+ return 0;
+ }
+
+ int ret = checkLockManager();
+ if (ret != 0) {
+ return ret;
+ }
+
+ HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+ .currentTimeMillis()), "IMPLICIT");
+
+ // Lock the database also
+ try {
+ Hive db = Hive.get(conf);
+ lockObjects.add(new HiveLockObj(new HiveLockObject(db.getCurrentDatabase(), lockData),
+ HiveLockMode.SHARED));
+ } catch (HiveException e) {
+ throw new SemanticException(e.getMessage());
+ }
+
+ ctx.setHiveLockMgr(hiveLockMgr);
+ List<HiveLock> hiveLocks = null;
+
+ int tryNum = 1;
+ do {
+
+ // ctx.getHiveLockMgr();
+ // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
+
+ if (hiveLocks != null) {
+ break;
+ }
+
+ tryNum++;
+ try {
+ Thread.sleep(sleepTime);
+ } catch (InterruptedException e) {
+ }
+ } while (tryNum < numRetries);
+
+ if (hiveLocks == null) {
+ throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
+ } else {
+ ctx.setHiveLocks(hiveLocks);
+ }
+
+ return (0);
+ } catch (SemanticException e) {
+ errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ } catch (Exception e) {
+ errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+ SQLState = ErrorMsg.findSQLState(e.getMessage());
+ console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (10);
+ }
+ }
+
+ /**
+ * Release all the locks acquired implicitly by the statement. Note that the
+ * locks acquired with 'keepAlive' set to True are not released.
+ **/
+ private void releaseLocks() {
+ if (ctx != null && ctx.getHiveLockMgr() != null) {
+ try {
+ ctx.getHiveLockMgr().close();
+ ctx.setHiveLocks(null);
+ } catch (LockException e) {
+ }
+ }
+ }
+
+ /**
+ * @param hiveLocks
+ * list of hive locks to be released Release all the locks
+ * specified. If some of the locks have already been released,
+ * ignore them
+ **/
+ private void releaseLocks(List<HiveLock> hiveLocks) {
+ if (hiveLocks != null) {
+ ctx.getHiveLockMgr().releaseLocks(hiveLocks);
+ }
+ ctx.setHiveLocks(null);
+ }
+
+ public CommandProcessorResponse run(String command) {
+ errorMessage = null;
+ SQLState = null;
+
+ int ret = compile(command);
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ // ret = acquireReadWriteLocks();
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ ret = execute();
+ if (ret != 0) {
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret, errorMessage, SQLState);
+ }
+
+ // releaseLocks(ctx.getHiveLocks());
+ return new CommandProcessorResponse(ret);
+ }
+
+ private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
+ ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
+ if (pestr == null) {
+ return saHooks;
+ }
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return saHooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
+ saHooks.add(hook);
+ } catch (HiveException e) {
+ console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return saHooks;
+ }
+
+ private List<Hook> getPreExecHooks() throws Exception {
+ ArrayList<Hook> pehooks = new ArrayList<Hook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return pehooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ } catch (ClassNotFoundException e) {
+ console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return pehooks;
+ }
+
+ private List<Hook> getPostExecHooks() throws Exception {
+ ArrayList<Hook> pehooks = new ArrayList<Hook>();
+ String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
+ pestr = pestr.trim();
+ if (pestr.equals("")) {
+ return pehooks;
+ }
+
+ String[] peClasses = pestr.split(",");
+
+ for (String peClass : peClasses) {
+ try {
+ pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+ } catch (ClassNotFoundException e) {
+ console.printError("Post Exec Hook Class not found:" + e.getMessage());
+ throw e;
+ }
+ }
+
+ return pehooks;
+ }
+
+ public int execute() {
+ // execute hivesterix plan
+ if (hivesterix) {
+ hivesterix = false;
+ int ret = engine.executeJob();
+ if (ret != 0)
+ return ret;
+ }
+
+ boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
+ int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
+
+ String queryId = plan.getQueryId();
+ String queryStr = plan.getQueryStr();
+
+ conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
+ conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+ maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
+
+ try {
+ LOG.info("Starting command: " + queryStr);
+
+ plan.setStarted();
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+ SessionState.get().getHiveHistory().logPlanProgress(plan);
+ }
+ resStream = null;
+
+ HookContext hookContext = new HookContext(plan, conf);
+
+ for (Hook peh : getPreExecHooks()) {
+ if (peh instanceof ExecuteWithHookContext) {
+ ((ExecuteWithHookContext) peh).run(hookContext);
+ } else if (peh instanceof PreExecute) {
+ ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
+ .getHadoopShims().getUGIForConf(conf));
+ }
+ }
+
+ int jobs = Utilities.getMRTasks(plan.getRootTasks()).size();
+ if (jobs > 0) {
+ console.printInfo("Total MapReduce jobs = " + jobs);
+ }
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory()
+ .setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
+ SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
+ }
+ String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
+
+ // A runtime that launches runnable tasks as separate Threads
+ // through
+ // TaskRunners
+ // As soon as a task isRunnable, it is put in a queue
+ // At any time, at most maxthreads tasks can be running
+ // The main thread polls the TaskRunners to check if they have
+ // finished.
+
+ Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+ Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
+
+ DriverContext driverCxt = new DriverContext(runnable, ctx);
+
+ // Add root Tasks to runnable
+
+ for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+ driverCxt.addToRunnable(tsk);
+ }
+
+ // Loop while you either have tasks running, or tasks queued up
+
+ while (running.size() != 0 || runnable.peek() != null) {
+ // Launch upto maxthreads tasks
+ while (runnable.peek() != null && running.size() < maxthreads) {
+ Task<? extends Serializable> tsk = runnable.remove();
+ console.printInfo("executing task " + tsk.getName());
+ launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
+ }
+
+ // poll the Tasks to see which one completed
+ TaskResult tskRes = pollTasks(running.keySet());
+ TaskRunner tskRun = running.remove(tskRes);
+ Task<? extends Serializable> tsk = tskRun.getTask();
+ hookContext.addCompleteTask(tskRun);
+
+ int exitVal = tskRes.getExitVal();
+ if (exitVal != 0) {
+ Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
+ if (backupTask != null) {
+ errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ + tsk.getClass().getName();
+ console.printError(errorMessage);
+
+ errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
+ console.printError(errorMessage);
+
+ // add backup task to runnable
+ if (DriverContext.isLaunchable(backupTask)) {
+ driverCxt.addToRunnable(backupTask);
+ }
+ continue;
+
+ } else {
+ // TODO: This error messaging is not very informative.
+ // Fix that.
+ errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+ + tsk.getClass().getName();
+ SQLState = "08S01";
+ console.printError(errorMessage);
+ if (running.size() != 0) {
+ taskCleanup();
+ }
+ // in case we decided to run everything in local mode,
+ // restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+ return 9;
+ }
+ }
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory()
+ .setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
+ SessionState.get().getHiveHistory().endTask(queryId, tsk);
+ }
+
+ if (tsk.getChildTasks() != null) {
+ for (Task<? extends Serializable> child : tsk.getChildTasks()) {
+ // hivesterix: don't check launchable condition
+ // if (DriverContext.isLaunchable(child)) {
+ driverCxt.addToRunnable(child);
+ // }
+ }
+ }
+ }
+
+ // in case we decided to run everything in local mode, restore the
+ // the jobtracker setting to its initial value
+ ctx.restoreOriginalTracker();
+
+ // remove incomplete outputs.
+ // Some incomplete outputs may be added at the beginning, for eg:
+ // for dynamic partitions.
+ // remove them
+ HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
+ for (WriteEntity output : plan.getOutputs()) {
+ if (!output.isComplete()) {
+ remOutputs.add(output);
+ }
+ }
+
+ for (WriteEntity output : remOutputs) {
+ plan.getOutputs().remove(output);
+ }
+
+ // Get all the post execution hooks and execute them.
+ for (Hook peh : getPostExecHooks()) {
+ if (peh instanceof ExecuteWithHookContext) {
+ ((ExecuteWithHookContext) peh).run(hookContext);
+ } else if (peh instanceof PostExecute) {
+ ((PostExecute) peh)
+ .run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
+ (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
+ : null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+ }
+ }
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
+ SessionState.get().getHiveHistory().printRowCount(queryId);
+ }
+ } catch (Exception e) {
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
+ }
+ // TODO: do better with handling types of Exception here
+ errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+ SQLState = "08S01";
+ console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return (12);
+ } finally {
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().endQuery(queryId);
+ }
+ if (noName) {
+ conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
+ }
+ }
+ plan.setDone();
+
+ if (SessionState.get() != null) {
+ try {
+ SessionState.get().getHiveHistory().logPlanProgress(plan);
+ } catch (Exception e) {
+ }
+ }
+ console.printInfo("OK");
+
+ return (0);
+ }
+
+ /**
+ * Launches a new task
+ *
+ * @param tsk
+ * task being launched
+ * @param queryId
+ * Id of the query containing the task
+ * @param noName
+ * whether the task has a name set
+ * @param running
+ * map from taskresults to taskrunners
+ * @param jobname
+ * name of the task, if it is a map-reduce job
+ * @param jobs
+ * number of map-reduce jobs
+ * @param curJobNo
+ * the sequential number of the next map-reduce job
+ * @return the updated number of last the map-reduce job launched
+ */
+
+ public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
+ Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
+
+ if (SessionState.get() != null) {
+ SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
+ }
+ if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
+ if (noName) {
+ conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
+ }
+ cxt.incCurJobNo(1);
+ console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
+ }
+ tsk.initialize(conf, plan, cxt);
+ TaskResult tskRes = new TaskResult();
+ TaskRunner tskRun = new TaskRunner(tsk, tskRes);
+
+ // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+ // Launch Task: hivesterix tweak
+ if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
+ // Launch it in the parallel mode, as a separate thread only for MR
+ // tasks
+ tskRes.setRunning(false);
+ tskRes.setExitVal(0);
+ } else if (tsk instanceof ConditionalTask) {
+ ConditionalTask condTask = (ConditionalTask) tsk;
+ ConditionalResolver crs = condTask.getResolver();
+ if (crs instanceof ConditionalResolverMergeFiles) {
+ tskRes.setRunning(false);
+ tskRes.setExitVal(0);
+
+ List<Task<? extends Serializable>> children = condTask.getListTasks();
+ for (Task<? extends Serializable> child : children)
+ if (child instanceof MapRedTask)
+ cxt.addToRunnable(child);
+ }
+ } else {
+ tskRun.runSequential();
+ }
+ running.put(tskRes, tskRun);
+ return;
+ }
+
+ /**
+ * Cleans up remaining tasks in case of failure
+ */
+
+ public void taskCleanup() {
+ // The currently existing Shutdown hooks will be automatically called,
+ // killing the map-reduce processes.
+ // The non MR processes will be killed as well.
+ System.exit(9);
+ }
+
+ /**
+ * Polls running tasks to see if a task has ended.
+ *
+ * @param results
+ * Set of result objects for running tasks
+ * @return The result object for any completed/failed task
+ */
+
+ public TaskResult pollTasks(Set<TaskResult> results) {
+ Iterator<TaskResult> resultIterator = results.iterator();
+ while (true) {
+ while (resultIterator.hasNext()) {
+ TaskResult tskRes = resultIterator.next();
+ if (tskRes.isRunning() == false) {
+ return tskRes;
+ }
+ }
+
+ // In this loop, nothing was found
+ // Sleep 10 seconds and restart
+ try {
+ Thread.sleep(sleeptime);
+ } catch (InterruptedException ie) {
+ // Do Nothing
+ ;
+ }
+ resultIterator = results.iterator();
+ }
+ }
+
+ public boolean getResults(ArrayList<String> res) throws IOException {
+ if (plan != null && plan.getFetchTask() != null) {
+ FetchTask ft = plan.getFetchTask();
+ ft.setMaxRows(maxRows);
+ return ft.fetch(res);
+ }
+
+ if (resStream == null) {
+ resStream = ctx.getStream();
+ }
+ if (resStream == null) {
+ return false;
+ }
+
+ int numRows = 0;
+ String row = null;
+
+ while (numRows < maxRows) {
+ if (resStream == null) {
+ if (numRows > 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ bos.reset();
+ Utilities.StreamStatus ss;
+ try {
+ ss = Utilities.readColumn(resStream, bos);
+ if (bos.getCount() > 0) {
+ row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
+ } else if (ss == Utilities.StreamStatus.TERMINATED) {
+ row = new String();
+ }
+
+ if (row != null) {
+ numRows++;
+ res.add(row);
+ }
+ } catch (IOException e) {
+ console.printError("FAILED: Unexpected IO exception : " + e.getMessage());
+ res = null;
+ return false;
+ }
+
+ if (ss == Utilities.StreamStatus.EOF) {
+ resStream = ctx.getStream();
+ }
+ }
+ return true;
+ }
+
+ public int close() {
+ try {
+ if (plan != null) {
+ FetchTask fetchTask = plan.getFetchTask();
+ if (null != fetchTask) {
+ try {
+ fetchTask.clearFetch();
+ } catch (Exception e) {
+ LOG.debug(" Exception while clearing the Fetch task ", e);
+ }
+ }
+ }
+ if (ctx != null) {
+ ctx.clear();
+ }
+ if (null != resStream) {
+ try {
+ ((FSDataInputStream) resStream).close();
+ } catch (Exception e) {
+ LOG.debug(" Exception while closing the resStream ", e);
+ }
+ }
+ } catch (Exception e) {
+ console.printError("FAILED: Hive Internal Error: " + Utilities.getNameMessage(e) + "\n"
+ + org.apache.hadoop.util.StringUtils.stringifyException(e));
+ return 13;
+ }
+
+ return 0;
+ }
+
+ public void destroy() {
+ releaseLocks();
+ }
+
+ public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
+ return plan.getQueryPlan();
+ }
+
+ public int getTryCount() {
+ return tryCount;
+ }
+
+ public void setTryCount(int tryCount) {
+ this.tryCount = tryCount;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
new file mode 100644
index 0000000..0f445f4
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFAverage.
+ */
+@Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers")
+public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFAverage.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFAverageEvaluator();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * GenericUDAFAverageEvaluator.
+ */
+ public static class GenericUDAFAverageEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ PrimitiveObjectInspector inputOI;
+
+ // For PARTIAL2 and FINAL
+ StructObjectInspector soi;
+ StructField countField;
+ StructField sumField;
+ LongObjectInspector countFieldOI;
+ DoubleObjectInspector sumFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ } else {
+ soi = (StructObjectInspector) parameters[0];
+ countField = soi.getStructFieldRef("count");
+ sumField = soi.getStructFieldRef("sum");
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a "long" count and a "double" sum.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ partialResult = new Object[2];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ result = new DoubleWritable(0);
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class AverageAgg implements SerializableBuffer {
+ long count;
+ double sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(sum);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ AverageAgg result = new AverageAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ myagg.count = 0;
+ myagg.sum = 0;
+ }
+
+ boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ Object p = parameters[0];
+ if (p != null) {
+ AverageAgg myagg = (AverageAgg) agg;
+ try {
+ double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+ myagg.count++;
+ myagg.sum += v;
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.sum);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ AverageAgg myagg = (AverageAgg) agg;
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialSum = soi.getStructFieldData(partial, sumField);
+ myagg.count += countFieldOI.get(partialCount);
+ myagg.sum += sumFieldOI.get(partialSum);
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ AverageAgg myagg = (AverageAgg) agg;
+ if (myagg.count == 0) {
+ return null;
+ } else {
+ result.set(myagg.sum / myagg.count);
+ return result;
+ }
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
new file mode 100644
index 0000000..2c4022e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
@@ -0,0 +1,392 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the Pearson correlation coefficient corr(x, y), using the following
+ * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel
+ * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe
+ * Pebay, Sandia Labs and
+ * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald
+ * Knuth.
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)):
+ * <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
+ * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
+ * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ */
+@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
+ + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+ + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
+ + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
+ + " COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
+ + "where neither x nor y is null,\n"
+ + "COVAR_POP is the population covariance,\n" + "and STDDEV_POP is the population standard deviation.")
+public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+
+ if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ return new GenericUDAFCorrelationEvaluator();
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(1, "Only numeric type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the Pearson correlation coefficient using a stable one-pass
+ * algorithm, based on work by Philippe Pébay and Donald Knuth.
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+ * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+ * mx_(n-1))*(y_n - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n -
+ * mx_n)(x_n - mx_(n-1)): <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n
+ * - my_(n-1)): <variance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B)
+ * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) =
+ * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ */
+ public static class GenericUDAFCorrelationEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector xInputOI;
+ private PrimitiveObjectInspector yInputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField xavgField;
+ private StructField yavgField;
+ private StructField xvarField;
+ private StructField yvarField;
+ private StructField covarField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector xavgFieldOI;
+ private DoubleObjectInspector yavgFieldOI;
+ private DoubleObjectInspector xvarFieldOI;
+ private DoubleObjectInspector yvarFieldOI;
+ private DoubleObjectInspector covarFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ assert (parameters.length == 2);
+ xInputOI = (PrimitiveObjectInspector) parameters[0];
+ yInputOI = (PrimitiveObjectInspector) parameters[1];
+ } else {
+ assert (parameters.length == 1);
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ xavgField = soi.getStructFieldRef("xavg");
+ yavgField = soi.getStructFieldRef("yavg");
+ xvarField = soi.getStructFieldRef("xvar");
+ yvarField = soi.getStructFieldRef("yvar");
+ covarField = soi.getStructFieldRef("covar");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+ yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+ xvarFieldOI = (DoubleObjectInspector) xvarField.getFieldObjectInspector();
+ yvarFieldOI = (DoubleObjectInspector) yvarField.getFieldObjectInspector();
+ covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count, two double averages, two double variances,
+ // and a double covariance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("xavg");
+ fname.add("yavg");
+ fname.add("xvar");
+ fname.add("yvar");
+ fname.add("covar");
+
+ partialResult = new Object[6];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+ partialResult[3] = new DoubleWritable(0);
+ partialResult[4] = new DoubleWritable(0);
+ partialResult[5] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number n of elements
+ double xavg; // average of x elements
+ double yavg; // average of y elements
+ double xvar; // n times the variance of x elements
+ double yvar; // n times the variance of y elements
+ double covar; // n times the covariance
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ xavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ xvar = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yvar = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ covar = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xvar, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yvar, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(covar, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(xavg);
+ output.writeDouble(yavg);
+ output.writeDouble(xvar);
+ output.writeDouble(yvar);
+ output.writeDouble(covar);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.xavg = 0;
+ myagg.yavg = 0;
+ myagg.xvar = 0;
+ myagg.yvar = 0;
+ myagg.covar = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 2);
+ Object px = parameters[0];
+ Object py = parameters[1];
+ if (px != null && py != null) {
+ StdAgg myagg = (StdAgg) agg;
+ double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+ double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+ double xavgOld = myagg.xavg;
+ double yavgOld = myagg.yavg;
+ myagg.count++;
+ myagg.xavg += (vx - xavgOld) / myagg.count;
+ myagg.yavg += (vy - yavgOld) / myagg.count;
+ if (myagg.count > 1) {
+ myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
+ myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
+ myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+ ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+ ((DoubleWritable) partialResult[3]).set(myagg.xvar);
+ ((DoubleWritable) partialResult[4]).set(myagg.yvar);
+ ((DoubleWritable) partialResult[5]).set(myagg.covar);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+ Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+ Object partialXVar = soi.getStructFieldData(partial, xvarField);
+ Object partialYVar = soi.getStructFieldData(partial, yvarField);
+ Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+ long nA = myagg.count;
+ long nB = countFieldOI.get(partialCount);
+
+ if (nA == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.xavg = xavgFieldOI.get(partialXAvg);
+ myagg.yavg = yavgFieldOI.get(partialYAvg);
+ myagg.xvar = xvarFieldOI.get(partialXVar);
+ myagg.yvar = yvarFieldOI.get(partialYVar);
+ myagg.covar = covarFieldOI.get(partialCovar);
+ }
+
+ if (nA != 0 && nB != 0) {
+ // Merge the two partials
+ double xavgA = myagg.xavg;
+ double yavgA = myagg.yavg;
+ double xavgB = xavgFieldOI.get(partialXAvg);
+ double yavgB = yavgFieldOI.get(partialYAvg);
+ double xvarB = xvarFieldOI.get(partialXVar);
+ double yvarB = yvarFieldOI.get(partialYVar);
+ double covarB = covarFieldOI.get(partialCovar);
+
+ myagg.count += nB;
+ myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+ myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+ myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * myagg.count;
+ myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * myagg.count;
+ myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count < 2) { // SQL standard - return null for zero or one
+ // pair
+ return null;
+ } else {
+ getResult().set(myagg.covar / java.lang.Math.sqrt(myagg.xvar) / java.lang.Math.sqrt(myagg.yvar));
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
new file mode 100644
index 0000000..dc5eef0
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * This class implements the COUNT aggregation function as in SQL.
+ */
+@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including "
+ + "rows containing NULL values.\n"
+
+ + "_FUNC_(expr) - Returns the number of rows for which the supplied " + "expression is non-NULL.\n"
+
+ + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for "
+ + "which the supplied expression(s) are unique and non-NULL.")
+public class GenericUDAFCount implements GenericUDAFResolver2 {
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ // This method implementation is preserved for backward compatibility.
+ return new GenericUDAFCountEvaluator();
+ }
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+
+ TypeInfo[] parameters = paramInfo.getParameters();
+
+ if (parameters.length == 0) {
+ if (!paramInfo.isAllColumns()) {
+ throw new UDFArgumentException("Argument expected");
+ }
+ assert !paramInfo.isDistinct() : "DISTINCT not supported with *";
+ } else {
+ if (parameters.length > 1 && !paramInfo.isDistinct()) {
+ throw new UDFArgumentException("DISTINCT keyword must be specified");
+ }
+ assert !paramInfo.isAllColumns() : "* not supported in expression list";
+ }
+
+ return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo.isAllColumns());
+ }
+
+ /**
+ * GenericUDAFCountEvaluator.
+ */
+ public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator {
+ private boolean countAllColumns = false;
+ private LongObjectInspector partialCountAggOI;
+ private LongWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+ partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ result = new LongWritable(0);
+ return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ }
+
+ private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) {
+ countAllColumns = countAllCols;
+ return this;
+ }
+
+ /** class for storing count value. */
+ static class CountAgg implements SerializableBuffer {
+ long value;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ value = BufferSerDeUtil.getLong(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(value, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(value);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ CountAgg buffer = new CountAgg();
+ reset(buffer);
+ return buffer;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ ((CountAgg) agg).value = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ // parameters == null means the input table/split is empty
+ if (parameters == null) {
+ return;
+ }
+ if (countAllColumns) {
+ assert parameters.length == 0;
+ ((CountAgg) agg).value++;
+ } else {
+ assert parameters.length > 0;
+ boolean countThisRow = true;
+ for (Object nextParam : parameters) {
+ if (nextParam == null) {
+ countThisRow = false;
+ break;
+ }
+ }
+ if (countThisRow) {
+ ((CountAgg) agg).value++;
+ }
+ }
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ long p = partialCountAggOI.get(partial);
+ ((CountAgg) agg).value += p;
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ result.set(((CountAgg) agg).value);
+ return result;
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
new file mode 100644
index 0000000..0c4448b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
@@ -0,0 +1,341 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the covariance covar_pop(x, y), using the following one-pass method
+ * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs):
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+ */
+@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+ + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n"
+ + "will be returned. Otherwise, it computes the following:\n"
+ + " (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n" + "where neither x nor y is null.")
+public class GenericUDAFCovariance extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 2) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+
+ if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ return new GenericUDAFCovarianceEvaluator();
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(1, "Only numeric or string type arguments are accepted but "
+ + parameters[1].getTypeName() + " is passed.");
+ }
+ case STRING:
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the variance using the algorithm described in
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance,
+ * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass
+ * Parallel Computation of Covariances and Arbitrary-Order Statistical
+ * Moments", Technical Report SAND2008-6212, Sandia National Laboratories,
+ * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+ * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+ * mx_(n-1))*(y_n - my_n) : <covariance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+ * This one-pass algorithm is stable.
+ */
+ public static class GenericUDAFCovarianceEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector xInputOI;
+ private PrimitiveObjectInspector yInputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField xavgField;
+ private StructField yavgField;
+ private StructField covarField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector xavgFieldOI;
+ private DoubleObjectInspector yavgFieldOI;
+ private DoubleObjectInspector covarFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ assert (parameters.length == 2);
+ xInputOI = (PrimitiveObjectInspector) parameters[0];
+ yInputOI = (PrimitiveObjectInspector) parameters[1];
+ } else {
+ assert (parameters.length == 1);
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ xavgField = soi.getStructFieldRef("xavg");
+ yavgField = soi.getStructFieldRef("yavg");
+ covarField = soi.getStructFieldRef("covar");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+ yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+ covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count, two double averages, and a double covariance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("xavg");
+ fname.add("yavg");
+ fname.add("covar");
+
+ partialResult = new Object[4];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+ partialResult[3] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number n of elements
+ double xavg; // average of x elements
+ double yavg; // average of y elements
+ double covar; // n times the covariance
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ xavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ yavg = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ covar = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(xavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(yavg, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(covar, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(xavg);
+ output.writeDouble(yavg);
+ output.writeDouble(covar);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.xavg = 0;
+ myagg.yavg = 0;
+ myagg.covar = 0;
+ }
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 2);
+ Object px = parameters[0];
+ Object py = parameters[1];
+ if (px != null && py != null) {
+ StdAgg myagg = (StdAgg) agg;
+ double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+ double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+ myagg.count++;
+ myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count;
+ if (myagg.count > 1) {
+ myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg);
+ }
+ myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count;
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+ ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+ ((DoubleWritable) partialResult[3]).set(myagg.covar);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+ Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+ Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+ long nA = myagg.count;
+ long nB = countFieldOI.get(partialCount);
+
+ if (nA == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.xavg = xavgFieldOI.get(partialXAvg);
+ myagg.yavg = yavgFieldOI.get(partialYAvg);
+ myagg.covar = covarFieldOI.get(partialCovar);
+ }
+
+ if (nA != 0 && nB != 0) {
+ // Merge the two partials
+ double xavgA = myagg.xavg;
+ double yavgA = myagg.yavg;
+ double xavgB = xavgFieldOI.get(partialXAvg);
+ double yavgB = yavgFieldOI.get(partialYAvg);
+ double covarB = covarFieldOI.get(partialCovar);
+
+ myagg.count += nB;
+ myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+ myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+ myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count == 0) { // SQL standard - return null for zero
+ // elements
+ return null;
+ } else {
+ getResult().set(myagg.covar / (myagg.count));
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
new file mode 100644
index 0000000..afdc397
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFSum.
+ */
+@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
+public class GenericUDAFSum extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ return new GenericUDAFSumLong();
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFSumDouble();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * GenericUDAFSumDouble.
+ */
+ public static class GenericUDAFSumDouble extends GenericUDAFEvaluator {
+ private PrimitiveObjectInspector inputOI;
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+ result = new DoubleWritable(0);
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+
+ /** class for storing double sum value. */
+ static class SumDoubleAgg implements SerializableBuffer {
+ boolean empty;
+ double sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ empty = BufferSerDeUtil.getBoolean(data, start);
+ start += 1;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeBoolean(empty, data, start);
+ start += 1;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeBoolean(empty);
+ output.writeDouble(sum);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ SumDoubleAgg result = new SumDoubleAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ myagg.empty = true;
+ myagg.sum = 0;
+ }
+
+ boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ try {
+ merge(agg, parameters[0]);
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ myagg.empty = false;
+ myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI);
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumDoubleAgg myagg = (SumDoubleAgg) agg;
+ if (myagg.empty) {
+ return null;
+ }
+ result.set(myagg.sum);
+ return result;
+ }
+
+ }
+
+ /**
+ * GenericUDAFSumLong.
+ */
+ public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
+ private PrimitiveObjectInspector inputOI;
+ private LongWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+ result = new LongWritable(0);
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+ }
+
+ /** class for storing double sum value. */
+ static class SumLongAgg implements SerializableBuffer {
+ boolean empty;
+ long sum;
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ empty = BufferSerDeUtil.getBoolean(data, start);
+ start += 1;
+ sum = BufferSerDeUtil.getLong(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeBoolean(empty, data, start);
+ start += 1;
+ BufferSerDeUtil.writeLong(sum, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeBoolean(empty);
+ output.writeLong(sum);
+ }
+ }
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ SumLongAgg result = new SumLongAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ myagg.empty = true;
+ myagg.sum = 0;
+ }
+
+ private boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ try {
+ merge(agg, parameters[0]);
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ return terminate(agg);
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI);
+ myagg.empty = false;
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ SumLongAgg myagg = (SumLongAgg) agg;
+ if (myagg.empty) {
+ return null;
+ }
+ result.set(myagg.sum);
+ return result;
+ }
+
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
new file mode 100644
index 0000000..e839008
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
@@ -0,0 +1,305 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the variance. This class is extended by: GenericUDAFVarianceSample
+ * GenericUDAFStd GenericUDAFStdSample
+ */
+@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers")
+public class GenericUDAFVariance extends AbstractGenericUDAFResolver {
+
+ static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName());
+
+ @Override
+ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+ if (parameters.length != 1) {
+ throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+ }
+
+ if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+ case BYTE:
+ case SHORT:
+ case INT:
+ case LONG:
+ case FLOAT:
+ case DOUBLE:
+ case STRING:
+ return new GenericUDAFVarianceEvaluator();
+ case BOOLEAN:
+ default:
+ throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+ + parameters[0].getTypeName() + " is passed.");
+ }
+ }
+
+ /**
+ * Evaluate the variance using the algorithm described by Chan, Golub, and
+ * LeVeque in
+ * "Algorithms for computing the sample variance: analysis and recommendations"
+ * The American Statistician, 37 (1983) pp. 242--247.
+ * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2)
+ * where: - variance is sum[x-avg^2] (this is actually n times the variance)
+ * and is updated at every step. - n is the count of elements in chunk1 - m
+ * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 =
+ * sum of elements in chunk2.
+ * This algorithm was proven to be numerically stable by J.L. Barlow in
+ * "Error analysis of a pairwise summation algorithm to compute sample variance"
+ * Numer. Math, 58 (1991) pp. 583--590
+ */
+ public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator {
+
+ // For PARTIAL1 and COMPLETE
+ private PrimitiveObjectInspector inputOI;
+
+ // For PARTIAL2 and FINAL
+ private StructObjectInspector soi;
+ private StructField countField;
+ private StructField sumField;
+ private StructField varianceField;
+ private LongObjectInspector countFieldOI;
+ private DoubleObjectInspector sumFieldOI;
+
+ // For PARTIAL1 and PARTIAL2
+ private Object[] partialResult;
+
+ // For FINAL and COMPLETE
+ private DoubleWritable result;
+
+ @Override
+ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ super.init(m, parameters);
+
+ // init input
+ if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+ inputOI = (PrimitiveObjectInspector) parameters[0];
+ } else {
+ soi = (StructObjectInspector) parameters[0];
+
+ countField = soi.getStructFieldRef("count");
+ sumField = soi.getStructFieldRef("sum");
+ varianceField = soi.getStructFieldRef("variance");
+
+ countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+ sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+ }
+
+ // init output
+ if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+ // The output of a partial aggregation is a struct containing
+ // a long count and doubles sum and variance.
+
+ ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+ foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+ foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+ ArrayList<String> fname = new ArrayList<String>();
+ fname.add("count");
+ fname.add("sum");
+ fname.add("variance");
+
+ partialResult = new Object[3];
+ partialResult[0] = new LongWritable(0);
+ partialResult[1] = new DoubleWritable(0);
+ partialResult[2] = new DoubleWritable(0);
+
+ return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+ } else {
+ setResult(new DoubleWritable(0));
+ return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+ }
+ }
+
+ static class StdAgg implements SerializableBuffer {
+ long count; // number of elements
+ double sum; // sum of elements
+ double variance; // sum[x-avg^2] (this is actually n times the
+ // variance)
+
+ @Override
+ public void deSerializeAggBuffer(byte[] data, int start, int len) {
+ count = BufferSerDeUtil.getLong(data, start);
+ start += 8;
+ sum = BufferSerDeUtil.getDouble(data, start);
+ start += 8;
+ variance = BufferSerDeUtil.getDouble(data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(byte[] data, int start, int len) {
+ BufferSerDeUtil.writeLong(count, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(sum, data, start);
+ start += 8;
+ BufferSerDeUtil.writeDouble(variance, data, start);
+ }
+
+ @Override
+ public void serializeAggBuffer(DataOutput output) throws IOException {
+ output.writeLong(count);
+ output.writeDouble(sum);
+ output.writeDouble(variance);
+ }
+ };
+
+ @Override
+ public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+ StdAgg result = new StdAgg();
+ reset(result);
+ return result;
+ }
+
+ @Override
+ public void reset(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ myagg.count = 0;
+ myagg.sum = 0;
+ myagg.variance = 0;
+ }
+
+ private boolean warned = false;
+
+ @Override
+ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+ assert (parameters.length == 1);
+ Object p = parameters[0];
+ if (p != null) {
+ StdAgg myagg = (StdAgg) agg;
+ try {
+ double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+ myagg.count++;
+ myagg.sum += v;
+ if (myagg.count > 1) {
+ double t = myagg.count * v - myagg.sum;
+ myagg.variance += (t * t) / ((double) myagg.count * (myagg.count - 1));
+ }
+ } catch (NumberFormatException e) {
+ if (!warned) {
+ warned = true;
+ LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+ LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+ }
+ }
+ }
+ }
+
+ @Override
+ public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+ ((LongWritable) partialResult[0]).set(myagg.count);
+ ((DoubleWritable) partialResult[1]).set(myagg.sum);
+ ((DoubleWritable) partialResult[2]).set(myagg.variance);
+ return partialResult;
+ }
+
+ @Override
+ public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+ if (partial != null) {
+ StdAgg myagg = (StdAgg) agg;
+
+ Object partialCount = soi.getStructFieldData(partial, countField);
+ Object partialSum = soi.getStructFieldData(partial, sumField);
+ Object partialVariance = soi.getStructFieldData(partial, varianceField);
+
+ long n = myagg.count;
+ long m = countFieldOI.get(partialCount);
+
+ if (n == 0) {
+ // Just copy the information since there is nothing so far
+ myagg.variance = sumFieldOI.get(partialVariance);
+ myagg.count = countFieldOI.get(partialCount);
+ myagg.sum = sumFieldOI.get(partialSum);
+ }
+
+ if (m != 0 && n != 0) {
+ // Merge the two partials
+
+ double a = myagg.sum;
+ double b = sumFieldOI.get(partialSum);
+
+ myagg.count += m;
+ myagg.sum += b;
+ double t = (m / (double) n) * a - b;
+ myagg.variance += sumFieldOI.get(partialVariance) + ((n / (double) m) / ((double) n + m)) * t * t;
+ }
+ }
+ }
+
+ @Override
+ public Object terminate(AggregationBuffer agg) throws HiveException {
+ StdAgg myagg = (StdAgg) agg;
+
+ if (myagg.count == 0) { // SQL standard - return null for zero
+ // elements
+ return null;
+ } else {
+ if (myagg.count > 1) {
+ getResult().set(myagg.variance / (myagg.count));
+ } else { // for one element the variance is always 0
+ getResult().set(0);
+ }
+ return getResult();
+ }
+ }
+
+ public void setResult(DoubleWritable result) {
+ this.result = result;
+ }
+
+ public DoubleWritable getResult() {
+ return result;
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..2d2401a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/configuration.xsl b/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
similarity index 100%
copy from hivesterix/conf/configuration.xsl
copy to hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
similarity index 98%
copy from hivesterix/conf/hive-default.xml
copy to hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
index 034ea61..587eede 100644
--- a/hivesterix/conf/hive-default.xml
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
@@ -23,22 +23,11 @@
By setting this property to -1, Hive will automatically figure out what
should be the number of reducers.
</description>
- </property>
- <property>
- <name>hive.hyracks.host</name>
- <value>128.195.14.4</value>
- </property>
-
- <property>
- <name>hive.hyracks.port</name>
- <value>3099</value>
- </property>
-
- <property>
- <name>hive.hyracks.app</name>
- <value>hivesterix</value>
- </property>
+ <property>
+ <name>hive.hyracks.connectorpolicy</name>
+ <value>PIPELINING</value>
+ </property>
<property>
<name>hive.hyracks.parrallelism</name>
@@ -52,12 +41,12 @@
<property>
<name>hive.algebricks.groupby.external.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
<name>hive.algebricks.sort.memory</name>
- <value>536870912</value>
+ <value>33554432</value>
</property>
<property>
diff --git a/hivesterix/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
similarity index 100%
copy from hivesterix/conf/hive-log4j.properties
copy to hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/master b/hivesterix/hivesterix-dist/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/slaves b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/resource/bin/ext/cli.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/cli.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
diff --git a/hivesterix/resource/bin/ext/help.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/help.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
diff --git a/hivesterix/resource/bin/ext/hiveserver.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/hiveserver.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
diff --git a/hivesterix/resource/bin/ext/hwi.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/hwi.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
diff --git a/hivesterix/resource/bin/ext/jar.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/jar.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
diff --git a/hivesterix/resource/bin/ext/lineage.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/lineage.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
diff --git a/hivesterix/resource/bin/ext/metastore.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/metastore.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
diff --git a/hivesterix/resource/bin/ext/rcfilecat.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/rcfilecat.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
diff --git a/hivesterix/resource/bin/ext/util/execHiveCmd.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
similarity index 100%
copy from hivesterix/resource/bin/ext/util/execHiveCmd.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
new file mode 100755
index 0000000..e0cdf73
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+ #Get IP Address
+ IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+else
+ IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ if [ "$IPADDR" = "" ]
+ then
+ IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+ fi
+
+fi
+echo $IPADDR
diff --git a/hivesterix/resource/bin/hive b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
similarity index 100%
copy from hivesterix/resource/bin/hive
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/hive
diff --git a/hivesterix/resource/bin/hive-config.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
similarity index 100%
copy from hivesterix/resource/bin/hive-config.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
diff --git a/hivesterix/resource/bin/init-hive-dfs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
similarity index 100%
copy from hivesterix/resource/bin/init-hive-dfs.sh
copy to hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix b/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix
new file mode 100644
index 0000000..6997078
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix
@@ -0,0 +1,113 @@
+#!/bin/sh
+# ----------------------------------------------------------------------------
+# Copyright 2001-2006 The Apache Software Foundation.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ----------------------------------------------------------------------------
+#
+# Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+# reserved.
+
+
+# resolve links - $0 may be a softlink
+PRG="$0"
+
+while [ -h "$PRG" ]; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`/"$link"
+ fi
+done
+
+PRGDIR=`dirname "$PRG"`
+BASEDIR=`cd "$PRGDIR/.." >/dev/null; pwd`
+
+
+
+# OS specific support. $var _must_ be set to either true or false.
+cygwin=false;
+darwin=false;
+case "`uname`" in
+ CYGWIN*) cygwin=true ;;
+ Darwin*) darwin=true
+ if [ -z "$JAVA_VERSION" ] ; then
+ JAVA_VERSION="CurrentJDK"
+ else
+ echo "Using Java version: $JAVA_VERSION"
+ fi
+ if [ -z "$JAVA_HOME" ] ; then
+ JAVA_HOME=/System/Library/Frameworks/JavaVM.framework/Versions/${JAVA_VERSION}/Home
+ fi
+ ;;
+esac
+
+if [ -z "$JAVA_HOME" ] ; then
+ if [ -r /etc/gentoo-release ] ; then
+ JAVA_HOME=`java-config --jre-home`
+ fi
+fi
+
+# For Cygwin, ensure paths are in UNIX format before anything is touched
+if $cygwin ; then
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
+fi
+
+# If a specific java binary isn't specified search for the standard 'java' binary
+if [ -z "$JAVACMD" ] ; then
+ if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ else
+ JAVACMD=`which java`
+ fi
+fi
+
+if [ ! -x "$JAVACMD" ] ; then
+ echo "Error: JAVA_HOME is not defined correctly." 1>&2
+ echo " We cannot execute $JAVACMD" 1>&2
+ exit 1
+fi
+
+if [ -z "$REPO" ]
+then
+ REPO="$BASEDIR"/lib
+fi
+
+CLASSPATH=$CLASSPATH_PREFIX:"$HADOOP_HOME"/conf:"$BASEDIR"/etc:$1
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin; then
+ [ -n "$CLASSPATH" ] && CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
+ [ -n "$JAVA_HOME" ] && JAVA_HOME=`cygpath --path --windows "$JAVA_HOME"`
+ [ -n "$HOME" ] && HOME=`cygpath --path --windows "$HOME"`
+ [ -n "$BASEDIR" ] && BASEDIR=`cygpath --path --windows "$BASEDIR"`
+ [ -n "$REPO" ] && REPO=`cygpath --path --windows "$REPO"`
+fi
+
+exec "$JAVACMD" $JAVA_OPTS \
+ -classpath "$CLASSPATH" \
+ -Dapp.name="pregelix" \
+ -Dapp.pid="$$" \
+ -Dapp.repo="$REPO" \
+ -Dapp.home="$BASEDIR" \
+ -Dbasedir="$BASEDIR" \
+ org.apache.hadoop.util.RunJar \
+ "$@"
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix.bat b/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix.bat
new file mode 100644
index 0000000..536e3c8
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/pregelix.bat
@@ -0,0 +1,110 @@
+@REM ----------------------------------------------------------------------------
+@REM Copyright 2001-2006 The Apache Software Foundation.
+@REM
+@REM Licensed under the Apache License, Version 2.0 (the "License");
+@REM you may not use this file except in compliance with the License.
+@REM You may obtain a copy of the License at
+@REM
+@REM http://www.apache.org/licenses/LICENSE-2.0
+@REM
+@REM Unless required by applicable law or agreed to in writing, software
+@REM distributed under the License is distributed on an "AS IS" BASIS,
+@REM WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@REM See the License for the specific language governing permissions and
+@REM limitations under the License.
+@REM ----------------------------------------------------------------------------
+@REM
+@REM Copyright (c) 2001-2006 The Apache Software Foundation. All rights
+@REM reserved.
+
+@echo off
+
+set ERROR_CODE=0
+
+:init
+@REM Decide how to startup depending on the version of windows
+
+@REM -- Win98ME
+if NOT "%OS%"=="Windows_NT" goto Win9xArg
+
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" @setlocal
+
+@REM -- 4NT shell
+if "%eval[2+2]" == "4" goto 4NTArgs
+
+@REM -- Regular WinNT shell
+set CMD_LINE_ARGS=%*
+goto WinNTGetScriptDir
+
+@REM The 4NT Shell from jp software
+:4NTArgs
+set CMD_LINE_ARGS=%$
+goto WinNTGetScriptDir
+
+:Win9xArg
+@REM Slurp the command line arguments. This loop allows for an unlimited number
+@REM of arguments (up to the command line limit, anyway).
+set CMD_LINE_ARGS=
+:Win9xApp
+if %1a==a goto Win9xGetScriptDir
+set CMD_LINE_ARGS=%CMD_LINE_ARGS% %1
+shift
+goto Win9xApp
+
+:Win9xGetScriptDir
+set SAVEDIR=%CD%
+%0\
+cd %0\..\..
+set BASEDIR=%CD%
+cd %SAVEDIR%
+set SAVE_DIR=
+goto repoSetup
+
+:WinNTGetScriptDir
+set BASEDIR=%~dp0\..
+
+:repoSetup
+
+
+if "%JAVACMD%"=="" set JAVACMD=java
+
+if "%REPO%"=="" set REPO=%BASEDIR%\lib
+
+cp $BASEDIR"\..\a-hadoop-patch.jar "$REPO"\
+
+set CLASSPATH="%BASEDIR%"\etc;"%REPO%"\a-hadoop-patch.jar;"%REPO%"\pregelix-api-0.0.1-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-api-0.2.2-SNAPSHOT.jar;"%REPO%"\json-20090211.jar;"%REPO%"\httpclient-4.1-alpha2.jar;"%REPO%"\httpcore-4.1-beta1.jar;"%REPO%"\commons-logging-1.1.1.jar;"%REPO%"\commons-codec-1.3.jar;"%REPO%"\args4j-2.0.12.jar;"%REPO%"\hyracks-ipc-0.2.2-SNAPSHOT.jar;"%REPO%"\commons-lang3-3.1.jar;"%REPO%"\hyracks-data-std-0.2.2-SNAPSHOT.jar;"%REPO%"\hadoop-core-0.20.2.jar;"%REPO%"\commons-cli-1.2.jar;"%REPO%"\xmlenc-0.52.jar;"%REPO%"\commons-httpclient-3.0.1.jar;"%REPO%"\commons-net-1.4.1.jar;"%REPO%"\oro-2.0.8.jar;"%REPO%"\jetty-6.1.14.jar;"%REPO%"\jetty-util-6.1.14.jar;"%REPO%"\servlet-api-2.5-6.1.14.jar;"%REPO%"\jasper-runtime-5.5.12.jar;"%REPO%"\jasper-compiler-5.5.12.jar;"%REPO%"\jsp-api-2.1-6.1.14.jar;"%REPO%"\jsp-2.1-6.1.14.jar;"%REPO%"\core-3.1.1.jar;"%REPO%"\ant-1.6.5.jar;"%REPO%"\commons-el-1.0.jar;"%REPO%"\jets3t-0.7.1.jar;"%REPO%"\kfs-0.3.jar;"%REPO%"\hsqldb-1.8.0.10.jar;"%REPO%"\pregelix-dataflow-std-0.0.1-SNAPSHOT.jar;"%REPO%"\pregelix-dataflow-std-base-0.0.1-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-std-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-dataflow-hadoop-0.2.2-SNAPSHOT.jar;"%REPO%"\dcache-client-0.0.1.jar;"%REPO%"\jetty-client-8.0.0.M0.jar;"%REPO%"\jetty-http-8.0.0.RC0.jar;"%REPO%"\jetty-io-8.0.0.RC0.jar;"%REPO%"\jetty-util-8.0.0.RC0.jar;"%REPO%"\hyracks-storage-am-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-storage-common-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-storage-am-btree-0.2.2-SNAPSHOT.jar;"%REPO%"\btreehelper-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-control-cc-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-control-common-0.2.2-SNAPSHOT.jar;"%REPO%"\commons-io-1.3.1.jar;"%REPO%"\jetty-server-8.0.0.RC0.jar;"%REPO%"\servlet-api-3.0.20100224.jar;"%REPO%"\jetty-continuation-8.0.0.RC0.jar;"%REPO%"\jetty-webapp-8.0.0.RC0.jar;"%REPO%"\jetty-xml-8.0.0.RC0.jar;"%REPO%"\jetty-servlet-8.0.0.RC0.jar;"%REPO%"\jetty-security-8.0.0.RC0.jar;"%REPO%"\wicket-core-1.5.2.jar;"%REPO%"\wicket-util-1.5.2.jar;"%REPO%"\slf4j-api-1.6.1.jar;"%REPO%"\wicket-request-1.5.2.jar;"%REPO%"\slf4j-jcl-1.6.3.jar;"%REPO%"\hyracks-control-nc-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-net-0.2.2-SNAPSHOT.jar;"%REPO%"\hyracks-hadoop-compat-0.2.2-SNAPSHOT.jar;"%REPO%"\pregelix-dataflow-0.0.1-SNAPSHOT.jar;"%REPO%"\pregelix-runtime-0.0.1-SNAPSHOT.jar;"%REPO%"\hadoop-test-0.20.2.jar;"%REPO%"\ftplet-api-1.0.0.jar;"%REPO%"\mina-core-2.0.0-M5.jar;"%REPO%"\ftpserver-core-1.0.0.jar;"%REPO%"\ftpserver-deprecated-1.0.0-M2.jar;"%REPO%"\javax.servlet-api-3.0.1.jar;"%REPO%"\pregelix-core-0.0.1-SNAPSHOT.jar
+goto endInit
+
+@REM Reaching here means variables are defined and arguments have been captured
+:endInit
+
+%JAVACMD% %JAVA_OPTS% -classpath %CLASSPATH_PREFIX%;%CLASSPATH% -Dapp.name="pregelix" -Dapp.repo="%REPO%" -Dapp.home="%BASEDIR%" -Dbasedir="%BASEDIR%" org.apache.hadoop.util.RunJar %CMD_LINE_ARGS%
+if ERRORLEVEL 1 goto error
+goto end
+
+:error
+if "%OS%"=="Windows_NT" @endlocal
+set ERROR_CODE=%ERRORLEVEL%
+
+:end
+@REM set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" goto endNT
+
+@REM For old DOS remove the set variables from ENV - we assume they were not set
+@REM before we started - at least we don't leave any baggage around
+set CMD_LINE_ARGS=
+goto postExec
+
+:endNT
+@REM If error code is set to 1 then the endlocal was done already in :error.
+if %ERROR_CODE% EQU 0 @endlocal
+
+
+:postExec
+
+if "%FORCE_EXIT_ON_ERROR%" == "on" (
+ if %ERROR_CODE% NEQ 0 exit %ERROR_CODE%
+)
+
+exit /B %ERROR_CODE%
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..d30da26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
new file mode 100644
index 0000000..6aa9161
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../hivesterix-dist-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application hivesterix \"$appzip\";" | $hyrackcmd
+echo ""
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
new file mode 100755
index 0000000..fe6cf27
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..fe2551d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir
+ mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..12367c1
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+ ssh $i "cd ${PREGELIX_PATH}; bin/stopnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..c2f525a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..03ce4e7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+ USERID=`id | sed 's/^uid=//;s/(.*$//'`
+ PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+ rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
new file mode 100644
index 0000000..aa38fe7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
@@ -0,0 +1,142 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
+
+public class PerfTestCase extends AbstractPerfTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ PerfTestCase(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!equal(buf, sb)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
new file mode 100644
index 0000000..796842d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
+
+public class PerfTestSuite extends AbstractPerfTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ PerfTestSuite testSuite = new PerfTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new PerfTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..4777351
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
@@ -0,0 +1,99 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
+
+public class PerfTestSuiteCaseGenerator extends AbstractPerfTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ PerfTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf);
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ long startTime = System.currentTimeMillis();
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ // driver.clear();
+ i++;
+ }
+ long endTime = System.currentTimeMillis();
+ System.out.println(resultFile.getName() + " execution time " + (endTime - startTime));
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ writeStringToFile(resultFile, sb);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
new file mode 100644
index 0000000..aa38014
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.perf;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
+
+public class PerfTestSuiteGenerator extends AbstractPerfTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ PerfTestSuiteGenerator testSuite = new PerfTestSuiteGenerator();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("q18_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new PerfTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
new file mode 100644
index 0000000..7e7db36
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
@@ -0,0 +1,49 @@
+package edu.uci.ics.hivesterix.perf.base;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import junit.framework.TestCase;
+
+public class AbstractPerfTestCase extends TestCase {
+ protected File queryFile;
+
+ public AbstractPerfTestCase(String testName, File queryFile) {
+ super(testName);
+ }
+
+ protected static void readFileToString(File file, StringBuilder buf) throws Exception {
+ BufferedReader result = new BufferedReader(new FileReader(file));
+ while (true) {
+ String s = result.readLine();
+ if (s == null) {
+ break;
+ } else {
+ buf.append(s);
+ buf.append('\n');
+ }
+ }
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringWriter buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringBuilder buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static String removeExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
new file mode 100644
index 0000000..d9492d9
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
@@ -0,0 +1,206 @@
+package edu.uci.ics.hivesterix.perf.base;
+
+import java.io.BufferedReader;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.TestSuite;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
+import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
+import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+
+@SuppressWarnings("deprecation")
+public abstract class AbstractPerfTestSuiteClass extends TestSuite {
+
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/perf/hadoop/conf";
+ private static final String PATH_TO_HIVE_CONF = "src/test/resources/perf/hive/conf/hive-default.xml";
+ private static final String PATH_TO_DATA = "src/test/resources/perf/data/";
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+
+ private JobConf conf = new JobConf();
+ protected FileSystem dfs;
+
+ private int numberOfNC = 2;
+ private ClusterControllerService cc;
+ private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
+
+ /**
+ * setup cluster
+ *
+ * @throws IOException
+ */
+ protected void setup() throws Exception {
+ setupHdfs();
+ setupHyracks();
+ }
+
+ private void setupHdfs() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ lfs.delete(new Path("metastore_db"), true);
+
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
+ dfs = dfsCluster.getFileSystem();
+
+ mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
+ hconf.setVar(HiveConf.ConfVars.HADOOPJT, "localhost:" + mrCluster.getJobTrackerPort());
+ hconf.setInt("mapred.min.split.size", 1342177280);
+
+ conf = new JobConf(hconf);
+ ConfUtil.setJobConf(conf);
+
+ String fsName = conf.get("fs.default.name");
+ hconf.set("hive.metastore.warehouse.dir", fsName.concat("/tmp/hivesterix"));
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ dfs.mkdirs(new Path(warehouse));
+ ConfUtil.setHiveConf(hconf);
+ }
+
+ private void setupHyracks() throws Exception {
+ // read hive conf
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+ SessionState.start(hconf);
+ String ipAddress = hconf.get("hive.hyracks.host");
+ int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
+ int clusterPort = clientPort;
+ String applicationName = hconf.get("hive.hyracks.app");
+
+ // start hyracks cc
+ CCConfig ccConfig = new CCConfig();
+ ccConfig.clientNetIpAddress = ipAddress;
+ ccConfig.clientNetPort = clientPort;
+ ccConfig.clusterNetPort = clusterPort;
+ ccConfig.profileDumpPeriod = 1000;
+ ccConfig.heartbeatPeriod = 200000000;
+ ccConfig.maxHeartbeatLapsePeriods = 200000000;
+ cc = new ClusterControllerService(ccConfig);
+ cc.start();
+
+ // start hyracks nc
+ for (int i = 0; i < numberOfNC; i++) {
+ NCConfig ncConfig = new NCConfig();
+ ncConfig.ccHost = ipAddress;
+ ncConfig.clusterNetIPAddress = ipAddress;
+ ncConfig.ccPort = clientPort;
+ ncConfig.dataIPAddress = "127.0.0.1";
+ ncConfig.nodeId = "nc" + i;
+ NodeControllerService nc = new NodeControllerService(ncConfig);
+ nc.start();
+ ncs.put(ncConfig.nodeId, nc);
+ }
+
+ IHyracksClientConnection hcc = new HyracksConnection(ccConfig.clientNetIpAddress, clientPort);
+ hcc.createApplication(applicationName, null);
+ }
+
+ protected void makeDir(String path) throws IOException {
+ dfs.mkdirs(new Path(path));
+ }
+
+ protected void loadFiles(String src, String dest) throws IOException {
+ dfs.copyFromLocalFile(new Path(src), new Path(dest));
+ }
+
+ protected void cleanup() throws Exception {
+ cleanupHdfs();
+ cleanupHyracks();
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHdfs() throws IOException {
+ dfs.delete(new Path("/"), true);
+ FileSystem.closeAll();
+ dfsCluster.shutdown();
+ }
+
+ /**
+ * cleanup hyracks cluster
+ */
+ private void cleanupHyracks() throws Exception {
+ Iterator<NodeControllerService> iterator = ncs.values().iterator();
+ while (iterator.hasNext()) {
+ NodeControllerService nc = iterator.next();
+ nc.stop();
+ }
+ cc.stop();
+ }
+
+ protected static List<String> getIgnoreList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ protected static boolean isIgnored(String q, List<String> ignoreList) {
+ for (String ignore : ignoreList) {
+ if (ignore.equals(q)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected void loadData() throws IOException {
+
+ makeDir("/tpch");
+ makeDir("/tpch/customer");
+ makeDir("/tpch/lineitem");
+ makeDir("/tpch/orders");
+ makeDir("/tpch/part");
+ makeDir("/tpch/partsupp");
+ makeDir("/tpch/supplier");
+ makeDir("/tpch/nation");
+ makeDir("/tpch/region");
+
+ makeDir("/jarod");
+
+ loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
+ loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
+ loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
+ loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
+ loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
+ loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
+ loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
+ loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
+
+ loadFiles(PATH_TO_DATA + "ext-gby.tbl", "/jarod/");
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
new file mode 100644
index 0000000..ae5fa05
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
@@ -0,0 +1,49 @@
+package edu.uci.ics.hivesterix.test.base;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import junit.framework.TestCase;
+
+public class AbstractHivesterixTestCase extends TestCase {
+ protected File queryFile;
+
+ public AbstractHivesterixTestCase(String testName, File queryFile) {
+ super(testName);
+ }
+
+ protected static void readFileToString(File file, StringBuilder buf) throws Exception {
+ BufferedReader result = new BufferedReader(new FileReader(file));
+ while (true) {
+ String s = result.readLine();
+ if (s == null) {
+ break;
+ } else {
+ buf.append(s);
+ buf.append('\n');
+ }
+ }
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringWriter buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static void writeStringToFile(File file, StringBuilder buf) throws Exception {
+ PrintWriter result = new PrintWriter(new FileWriter(file));
+ result.print(buf);
+ result.close();
+ }
+
+ protected static String removeExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
new file mode 100644
index 0000000..e455527
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
@@ -0,0 +1,233 @@
+package edu.uci.ics.hivesterix.test.base;
+
+import java.io.BufferedReader;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import junit.framework.TestSuite;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MiniMRCluster;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
+import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
+import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
+import edu.uci.ics.hyracks.control.nc.NodeControllerService;
+
+@SuppressWarnings("deprecation")
+public abstract class AbstractTestSuiteClass extends TestSuite {
+
+ private static final String PATH_TO_HADOOP_CONF = "src/test/resources/runtimefunctionts/hadoop/conf";
+ private static final String PATH_TO_HIVE_CONF = "src/test/resources/runtimefunctionts/hive/conf/hive-default.xml";
+
+ private static final String PATH_TO_CLUSTER_CONF = "src/test/resources/runtimefunctionts/hive/conf/topology.xml";
+ private static final String PATH_TO_DATA = "src/test/resources/runtimefunctionts/data/";
+
+ private static final String clusterPropertiesPath = "conf/cluster.properties";
+ private Properties clusterProps;
+
+ private MiniDFSCluster dfsCluster;
+ private MiniMRCluster mrCluster;
+
+ private JobConf conf = new JobConf();
+ protected FileSystem dfs;
+
+ private int numberOfNC = 2;
+ private ClusterControllerService cc;
+ private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
+
+ /**
+ * setup cluster
+ *
+ * @throws IOException
+ */
+ protected void setup() throws Exception {
+ setupHdfs();
+ setupHyracks();
+ }
+
+ private void setupHdfs() throws IOException {
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
+ conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+
+ FileSystem lfs = FileSystem.getLocal(new Configuration());
+ lfs.delete(new Path("build"), true);
+ lfs.delete(new Path("metastore_db"), true);
+
+ System.setProperty("hadoop.log.dir", "logs");
+ dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
+ dfs = dfsCluster.getFileSystem();
+
+ mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
+ hconf.setVar(HiveConf.ConfVars.HADOOPJT, "localhost:" + mrCluster.getJobTrackerPort());
+
+ conf = new JobConf(hconf);
+ ConfUtil.setJobConf(conf);
+
+ String fsName = conf.get("fs.default.name");
+ hconf.set("hive.metastore.warehouse.dir", fsName.concat("/tmp/hivesterix"));
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ dfs.mkdirs(new Path(warehouse));
+ ConfUtil.setHiveConf(hconf);
+ }
+
+ private void setupHyracks() throws Exception {
+ // read hive conf
+ HiveConf hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path(PATH_TO_HIVE_CONF));
+ SessionState.start(hconf);
+ /**
+ * load the properties file if it is not loaded
+ */
+ if (clusterProps == null) {
+ clusterProps = new Properties();
+ InputStream confIn = new FileInputStream(clusterPropertiesPath);
+ clusterProps.load(confIn);
+ confIn.close();
+ }
+ Process process = Runtime.getRuntime().exec("src/main/resources/scripts/getip.sh");
+ BufferedReader ipReader = new BufferedReader(new InputStreamReader(
+ new DataInputStream(process.getInputStream())));
+ String ipAddress = ipReader.readLine();
+ ipReader.close();
+ int clientPort = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+ int netPort = Integer.parseInt(clusterProps.getProperty("CC_CLUSTERPORT"));
+ String applicationName = "hivesterix";
+
+ // start hyracks cc
+ CCConfig ccConfig = new CCConfig();
+ ccConfig.clientNetIpAddress = ipAddress;
+ ccConfig.clientNetPort = clientPort;
+ ccConfig.clusterNetPort = netPort;
+ ccConfig.profileDumpPeriod = 1000;
+ ccConfig.heartbeatPeriod = 200000000;
+ ccConfig.maxHeartbeatLapsePeriods = 200000000;
+ ccConfig.clusterTopologyDefinition = new File(PATH_TO_CLUSTER_CONF);
+ cc = new ClusterControllerService(ccConfig);
+ cc.start();
+
+ // start hyracks nc
+ for (int i = 0; i < numberOfNC; i++) {
+ NCConfig ncConfig = new NCConfig();
+ ncConfig.ccHost = ipAddress;
+ ncConfig.clusterNetIPAddress = ipAddress;
+ ncConfig.ccPort = netPort;
+ ncConfig.dataIPAddress = "127.0.0.1";
+ ncConfig.nodeId = "nc" + i;
+ NodeControllerService nc = new NodeControllerService(ncConfig);
+ nc.start();
+ ncs.put(ncConfig.nodeId, nc);
+ }
+
+ IHyracksClientConnection hcc = new HyracksConnection(ccConfig.clientNetIpAddress, clientPort);
+ hcc.createApplication(applicationName, null);
+ }
+
+ protected void makeDir(String path) throws IOException {
+ dfs.mkdirs(new Path(path));
+ }
+
+ protected void loadFiles(String src, String dest) throws IOException {
+ dfs.copyFromLocalFile(new Path(src), new Path(dest));
+ }
+
+ protected void cleanup() throws Exception {
+ cleanupHdfs();
+ cleanupHyracks();
+ }
+
+ /**
+ * cleanup hdfs cluster
+ */
+ private void cleanupHdfs() throws IOException {
+ dfs.delete(new Path("/"), true);
+ FileSystem.closeAll();
+ dfsCluster.shutdown();
+ }
+
+ /**
+ * cleanup hyracks cluster
+ */
+ private void cleanupHyracks() throws Exception {
+ Iterator<NodeControllerService> iterator = ncs.values().iterator();
+ while (iterator.hasNext()) {
+ NodeControllerService nc = iterator.next();
+ nc.stop();
+ }
+ cc.stop();
+ }
+
+ protected static List<String> getIgnoreList(String ignorePath) throws FileNotFoundException, IOException {
+ BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
+ String s = null;
+ List<String> ignores = new ArrayList<String>();
+ while ((s = reader.readLine()) != null) {
+ ignores.add(s);
+ }
+ reader.close();
+ return ignores;
+ }
+
+ protected static boolean isIgnored(String q, List<String> ignoreList) {
+ for (String ignore : ignoreList) {
+ if (q.indexOf(ignore) >= 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ protected void loadData() throws IOException {
+
+ makeDir("/tpch");
+ makeDir("/tpch/customer");
+ makeDir("/tpch/lineitem");
+ makeDir("/tpch/orders");
+ makeDir("/tpch/part");
+ makeDir("/tpch/partsupp");
+ makeDir("/tpch/supplier");
+ makeDir("/tpch/nation");
+ makeDir("/tpch/region");
+
+ makeDir("/test");
+ makeDir("/test/joinsrc1");
+ makeDir("/test/joinsrc2");
+
+ loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
+ loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
+ loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
+ loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
+ loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
+ loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
+ loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
+ loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
+
+ loadFiles(PATH_TO_DATA + "large_card_join_src.tbl", "/test/joinsrc1/");
+ loadFiles(PATH_TO_DATA + "large_card_join_src_small.tbl", "/test/joinsrc2/");
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
new file mode 100644
index 0000000..ac029b1
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.test.datagen;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileOutputFormat;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.MapReduceBase;
+import org.apache.hadoop.mapred.Mapper;
+import org.apache.hadoop.mapred.OutputCollector;
+import org.apache.hadoop.mapred.Reducer;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+@SuppressWarnings("deprecation")
+public class RecordBalance {
+
+ private static String confPath = System.getenv("HADDOP_HOME");
+ private static Path[] inputPaths = { new Path("/tpch/100x/customer"), new Path("/tpch/100x/nation"),
+ new Path("/tpch/100x/region"), new Path("/tpch/100x/lineitem"), new Path("/tpch/100x/orders"),
+ new Path("/tpch/100x/part"), new Path("/tpch/100x/partsupp"), new Path("/tpch/100x/supplier") };
+
+ private static Path[] outputPaths = { new Path("/tpch/100/customer"), new Path("/tpch/100/nation"),
+ new Path("/tpch/100/region"), new Path("/tpch/100/lineitem"), new Path("/tpch/100/orders"),
+ new Path("/tpch/100/part"), new Path("/tpch/100/partsupp"), new Path("/tpch/100/supplier") };
+
+ public static class MapRecordOnly extends MapReduceBase implements Mapper<LongWritable, Text, LongWritable, Text> {
+
+ public void map(LongWritable id, Text inputValue, OutputCollector<LongWritable, Text> output, Reporter reporter)
+ throws IOException {
+ output.collect(id, inputValue);
+ }
+ }
+
+ public static class ReduceRecordOnly extends MapReduceBase implements
+ Reducer<LongWritable, Text, NullWritable, Text> {
+
+ NullWritable key = NullWritable.get();
+
+ public void reduce(LongWritable inputKey, Iterator<Text> inputValue,
+ OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException {
+ while (inputValue.hasNext())
+ output.collect(key, inputValue.next());
+ }
+ }
+
+ public static void main(String[] args) throws IOException {
+
+ for (int i = 0; i < inputPaths.length; i++) {
+ JobConf job = new JobConf(RecordBalance.class);
+ job.addResource(new Path(confPath + "/core-site.xml"));
+ job.addResource(new Path(confPath + "/mapred-site.xml"));
+ job.addResource(new Path(confPath + "/hdfs-site.xml"));
+
+ job.setJobName(RecordBalance.class.getSimpleName());
+ job.setMapperClass(MapRecordOnly.class);
+ job.setReducerClass(ReduceRecordOnly.class);
+ job.setMapOutputKeyClass(LongWritable.class);
+ job.setMapOutputValueClass(Text.class);
+
+ job.setInputFormat(TextInputFormat.class);
+ FileInputFormat.setInputPaths(job, inputPaths[i]);
+ FileOutputFormat.setOutputPath(job, outputPaths[i]);
+ job.setNumReduceTasks(Integer.parseInt(args[0]));
+
+ JobClient.runJob(job);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
new file mode 100644
index 0000000..32c1c3f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
@@ -0,0 +1,142 @@
+package edu.uci.ics.hivesterix.test.legacy;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class LegacyTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ public LegacyTestCase(File queryFile, File resultFile) {
+ super("legacy", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!equal(buf, sb)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
new file mode 100644
index 0000000..0e2a5d5
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
@@ -0,0 +1,53 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class OptimizerTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+
+ OptimizerTestCase(File queryFile, File resultFile) {
+ super("testOptimizer", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testOptimizer() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ if (query.toLowerCase().indexOf("create") >= 0 || query.toLowerCase().indexOf("drop") >= 0
+ || query.toLowerCase().indexOf("set") >= 0 || query.toLowerCase().startsWith("\n\ncreate")
+ || query.toLowerCase().startsWith("\n\ndrop") || query.toLowerCase().startsWith("\n\nset"))
+ driver.run(query);
+ else
+ driver.compile(query);
+ driver.clear();
+ i++;
+ }
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ if (!buf.toString().equals(sw.toString())) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + sw.toString());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
new file mode 100644
index 0000000..c6b788f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class OptimizerTestSuitGenerator extends AbstractTestSuiteClass {
+ private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "plan";
+
+ public static Test suite() throws UnsupportedEncodingException, FileNotFoundException, IOException {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ OptimizerTestSuitGenerator testSuite = new OptimizerTestSuitGenerator();
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = aqlExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new OptimizerTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String aqlExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
new file mode 100644
index 0000000..8ac4e86
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
@@ -0,0 +1,53 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+import junit.framework.Test;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class OptimizerTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "plan";
+
+ public static Test suite() throws UnsupportedEncodingException, FileNotFoundException, IOException {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ OptimizerTestSuite testSuite = new OptimizerTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("h11_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new OptimizerTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..ee82eb3
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
@@ -0,0 +1,50 @@
+package edu.uci.ics.hivesterix.test.optimizer;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class OptimizerTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
+ private File resultFile;
+
+ OptimizerTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testOptimizer", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testOptimizer() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ if (query.toLowerCase().indexOf("create") >= 0 || query.toLowerCase().indexOf("drop") >= 0
+ || query.toLowerCase().indexOf("set") >= 0 || query.toLowerCase().startsWith("\n\ncreate")
+ || query.toLowerCase().startsWith("\n\ndrop") || query.toLowerCase().startsWith("\n\nset"))
+ driver.run(query);
+ else
+ driver.compile(query);
+ driver.clear();
+ i++;
+ }
+ sw.close();
+ writeStringToFile(resultFile, sw);
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
new file mode 100644
index 0000000..fdc4c68
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
@@ -0,0 +1,147 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class RuntimeFunctionTestCase extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ RuntimeFunctionTestCase(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+ // Driver driver = new Driver(hconf);
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+
+ StringBuilder buf = new StringBuilder();
+ readFileToString(resultFile, buf);
+ StringBuffer errorMsg = new StringBuffer();
+ if (!equal(buf, sb, errorMsg)) {
+ throw new Exception("Result for " + queryFile + " changed:\n" + errorMsg.toString());
+ }
+ deleteDir(resultDirectory);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+
+ private boolean equal(StringBuilder sb1, StringBuilder sb2, StringBuffer errorMsg) {
+ String s1 = sb1.toString();
+ String s2 = sb2.toString();
+ String[] rowsOne = s1.split("\n");
+ String[] rowsTwo = s2.split("\n");
+
+ if (rowsOne.length != rowsTwo.length)
+ return false;
+
+ for (int i = 0; i < rowsOne.length; i++) {
+ String row1 = rowsOne[i];
+ String row2 = rowsTwo[i];
+
+ if (row1.equals(row2))
+ continue;
+
+ String[] fields1 = row1.split("");
+ String[] fields2 = row2.split("");
+
+ for (int j = 0; j < fields1.length; j++) {
+ if (fields1[j].equals(fields2[j])) {
+ continue;
+ } else if (fields1[j].indexOf('.') < 0) {
+ errorMsg.append("line " + i + " column " + j + ": " + fields2[j] + " expected " + fields1[j]);
+ return false;
+ } else {
+ Float float1 = Float.parseFloat(fields1[j]);
+ Float float2 = Float.parseFloat(fields2[j]);
+
+ if (Math.abs(float1 - float2) == 0)
+ continue;
+ else {
+ errorMsg.append("line " + i + " column " + j + ": " + fields2[j] + " expected " + fields1[j]);
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
new file mode 100644
index 0000000..9610497
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile()) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
new file mode 100644
index 0000000..a416759
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
@@ -0,0 +1,99 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.Driver;
+import org.junit.Test;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
+
+public class RuntimeFunctionTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
+ private File resultFile;
+ private FileSystem dfs;
+
+ RuntimeFunctionTestSuiteCaseGenerator(File queryFile, File resultFile) {
+ super("testRuntimeFunction", queryFile);
+ this.queryFile = queryFile;
+ this.resultFile = resultFile;
+ }
+
+ @Test
+ public void testRuntimeFunction() throws Exception {
+ StringBuilder queryString = new StringBuilder();
+ readFileToString(queryFile, queryString);
+ String[] queries = queryString.toString().split(";");
+ StringWriter sw = new StringWriter();
+
+ HiveConf hconf = ConfUtil.getHiveConf();
+ Driver driver = new Driver(hconf, new PrintWriter(sw));
+ driver.init();
+
+ dfs = FileSystem.get(ConfUtil.getJobConf());
+
+ int i = 0;
+ for (String query : queries) {
+ if (i == queries.length - 1)
+ break;
+ driver.run(query);
+ driver.clear();
+ i++;
+ }
+
+ String warehouse = hconf.get("hive.metastore.warehouse.dir");
+ String tableName = removeExt(resultFile.getName());
+ String directory = warehouse + "/" + tableName + "/";
+ String localDirectory = "tmp";
+
+ FileStatus[] files = dfs.listStatus(new Path(directory));
+ FileSystem lfs = null;
+ if (files == null) {
+ lfs = FileSystem.getLocal(ConfUtil.getJobConf());
+ files = lfs.listStatus(new Path(directory));
+ }
+
+ File resultDirectory = new File(localDirectory + "/" + tableName);
+ deleteDir(resultDirectory);
+ resultDirectory.mkdir();
+
+ for (FileStatus fs : files) {
+ Path src = fs.getPath();
+ if (src.getName().indexOf("crc") >= 0)
+ continue;
+
+ String destStr = localDirectory + "/" + tableName + "/" + src.getName();
+ Path dest = new Path(destStr);
+ if (lfs != null) {
+ lfs.copyToLocalFile(src, dest);
+ dfs.copyFromLocalFile(dest, new Path(directory));
+ } else
+ dfs.copyToLocalFile(src, dest);
+ }
+
+ File[] rFiles = resultDirectory.listFiles();
+ StringBuilder sb = new StringBuilder();
+ for (File r : rFiles) {
+ if (r.getName().indexOf("crc") >= 0)
+ continue;
+ readFileToString(r, sb);
+ }
+ deleteDir(resultDirectory);
+
+ writeStringToFile(resultFile, sb);
+ }
+
+ private void deleteDir(File resultDirectory) {
+ if (resultDirectory.exists()) {
+ File[] rFiles = resultDirectory.listFiles();
+ for (File r : rFiles)
+ r.delete();
+ resultDirectory.delete();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
new file mode 100644
index 0000000..ca2bd6d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.test.runtimefunction;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.Test;
+import junit.framework.TestResult;
+import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
+
+public class RuntimeFunctionTestSuiteGenerator extends AbstractTestSuiteClass {
+
+ private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
+ private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
+ private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
+
+ private static final String FILE_EXTENSION_OF_RESULTS = "result";
+
+ public static Test suite() throws Exception {
+ List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
+ File testData = new File(PATH_TO_QUERIES);
+ File[] queries = testData.listFiles();
+ RuntimeFunctionTestSuiteGenerator testSuite = new RuntimeFunctionTestSuiteGenerator();
+
+ // set hdfs and hyracks cluster, and load test data to hdfs
+ try {
+ testSuite.setup();
+ testSuite.loadData();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+
+ for (File qFile : queries) {
+ if (isIgnored(qFile.getName(), ignores))
+ continue;
+
+ if (qFile.isFile() && qFile.getName().startsWith("q16_")) {
+ String resultFileName = hiveExtToResExt(qFile.getName());
+ File rFile = new File(PATH_TO_RESULTS + resultFileName);
+ testSuite.addTest(new RuntimeFunctionTestSuiteCaseGenerator(qFile, rFile));
+ }
+ }
+ return testSuite;
+ }
+
+ private static String hiveExtToResExt(String fname) {
+ int dot = fname.lastIndexOf('.');
+ return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
+ }
+
+ /**
+ * Runs the tests and collects their result in a TestResult.
+ */
+ @Override
+ public void run(TestResult result) {
+
+ int testCount = countTestCases();
+ for (int i = 0; i < testCount; i++) {
+ Test each = this.testAt(i);
+ if (result.shouldStop())
+ break;
+ runTest(each, result);
+ }
+
+ // cleanup hdfs and hyracks cluster
+ try {
+ cleanup();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new IllegalStateException(e.getMessage());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
new file mode 100644
index 0000000..cd39c5a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
@@ -0,0 +1,217 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.test.serde;
+
+import java.util.List;
+import java.util.Properties;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+
+/**
+ * TestLazySimpleSerDe.
+ */
+@SuppressWarnings({ "deprecation", "rawtypes" })
+public class SerDeTest extends TestCase {
+
+ /**
+ * Test the LazySimpleSerDe class.
+ */
+ public void testLazySimpleSerDe() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ LazySerDe outputSerde = new LazySerDe();
+ outputSerde.initialize(conf, tbl);
+
+ // Data
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\tqf";
+
+ byte[] bytes = s.getBytes();
+ Writable bytesWritable = new BytesWritable(bytes);
+
+ // Test
+ // deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+ Object row = serDe.deserialize(bytesWritable); // test my serde
+ StructObjectInspector simpleInspector = (StructObjectInspector) serDe.getObjectInspector();
+ List<Object> fields = simpleInspector.getStructFieldsDataAsList(row);
+ List<? extends StructField> fieldRefs = simpleInspector.getAllStructFieldRefs();
+
+ int i = 0;
+ for (Object field : fields) {
+ BytesWritable fieldWritable = (BytesWritable) outputSerde.serialize(field, fieldRefs.get(i)
+ .getFieldObjectInspector());
+ System.out.print(fieldWritable.getSize() + "|");
+ i++;
+ }
+
+ // Writable output = outputSerde.serialize(row, serDe
+ // .getObjectInspector());
+ // System.out.println(output);
+ //
+ // Object row2 = outputSerde.deserialize(output);
+ // Writable output2 = serDe.serialize(row2, outputSerde
+ // .getObjectInspector());
+ // System.out.println(output2);
+
+ // System.out.println(output);
+ // deserializeAndSerialize(outputSerde, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ private void deserializeAndSerialize(SerDe serDe, Text t, String s, Object[] expectedFieldsData)
+ throws SerDeException {
+ // Get the row structure
+ StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
+ List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+ assertEquals(8, fieldRefs.size());
+
+ // Deserialize
+ Object row = serDe.deserialize(t);
+ for (int i = 0; i < fieldRefs.size(); i++) {
+ Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
+ if (fieldData != null) {
+ fieldData = ((LazyPrimitive) fieldData).getWritableObject();
+ }
+ assertEquals("Field " + i, expectedFieldsData[i], fieldData);
+ }
+ // Serialize
+ assertEquals(Text.class, serDe.getSerializedClass());
+ Text serializedText = (Text) serDe.serialize(row, oi);
+ assertEquals("Serialized data", s, serializedText.toString());
+ }
+
+ private Properties createProperties() {
+ Properties tbl = new Properties();
+
+ // Set the configuration parameters
+ tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
+ tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,anullint,anullstring");
+ tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:int:string");
+ tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
+ return tbl;
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with LastColumnTakesRest option.
+ */
+ public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST, "true");
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
+ null, new Text("a\tb\t") };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with extra columns.
+ */
+ public void testLazySimpleSerDeExtraColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
+ String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text("hive and hadoop"),
+ null, new Text("a") };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+ /**
+ * Test the LazySimpleSerDe class with missing columns.
+ */
+ public void testLazySimpleSerDeMissingColumns() throws Throwable {
+ try {
+ // Create the SerDe
+ LazySimpleSerDe serDe = new LazySimpleSerDe();
+ Configuration conf = new Configuration();
+ Properties tbl = createProperties();
+ serDe.initialize(conf, tbl);
+
+ // Data
+ Text t = new Text("123\t456\t789\t1000\t5.3\t");
+ String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
+ Object[] expectedFieldsData = { new ByteWritable((byte) 123), new ShortWritable((short) 456),
+ new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3), new Text(""), null, null };
+
+ // Test
+ deserializeAndSerialize(serDe, t, s, expectedFieldsData);
+
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+
+}
diff --git a/hivesterix/src/test/resources/log4j.properties b/hivesterix/hivesterix-dist/src/test/resources/log4j.properties
similarity index 100%
rename from hivesterix/src/test/resources/log4j.properties
rename to hivesterix/hivesterix-dist/src/test/resources/log4j.properties
diff --git a/hivesterix/src/test/resources/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/logging.properties
similarity index 100%
rename from hivesterix/src/test/resources/logging.properties
rename to hivesterix/hivesterix-dist/src/test/resources/logging.properties
diff --git a/hivesterix/src/test/resources/optimizerts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/hive/conf/hive-default.xml
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/hive/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/hive/conf/hive-default.xml
diff --git a/hivesterix/src/test/resources/optimizerts/ignore.txt b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/ignore.txt
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/ignore.txt
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/ignore.txt
diff --git a/hivesterix/src/test/resources/optimizerts/queries/h11_share_scan.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h11_share_scan.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/h11_share_scan.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h11_share_scan.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/h12_select_struct.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h12_select_struct.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/h12_select_struct.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/h12_select_struct.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q10_returned_item.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q11_important_stock.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q11_important_stock.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q11_important_stock.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q12_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q12_shipping.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q13_customer_distribution.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q14_promotion_effect.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q15_top_supplier.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q18_large_volume_customer.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q19_discounted_revenue.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q1_pricing_summary_report.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q20_potential_part_promotion.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q22_global_sales_opportunity.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q3_shipping_priority.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q4_order_priority.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q5_local_supplier_volume.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q6_forecast_revenue_change.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q7_volume_shipping.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q8_national_market_share.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q8_national_market_share.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/q9_product_type_profit.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u10_nestedloop_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u1_group_by.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u1_group_by.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u1_group_by.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u1_group_by.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u2_select-project.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u2_select-project.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u2_select-project.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u2_select-project.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u3_union.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u3_union.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u3_union.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u3_union.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u4_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u4_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u4_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u4_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u5_lateral_view.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u5_lateral_view.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u5_lateral_view.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u5_lateral_view.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u6_limit.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u6_limit.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u6_limit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u6_limit.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u7_multi_join.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u7_multi_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u7_multi_join.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u8_non_mapred.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u8_non_mapred.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u8_non_mapred.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u8_non_mapred.hive
diff --git a/hivesterix/src/test/resources/optimizerts/queries/u9_order_by.hive b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u9_order_by.hive
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/queries/u9_order_by.hive
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/queries/u9_order_by.hive
diff --git a/hivesterix/src/test/resources/optimizerts/results/h11_share_scan.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h11_share_scan.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/h11_share_scan.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h11_share_scan.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/h12_select_struct.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h12_select_struct.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/h12_select_struct.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/h12_select_struct.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q10_returned_item.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q10_returned_item.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q10_returned_item.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q11_important_stock.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q11_important_stock.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q11_important_stock.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q12_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q12_shipping.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q12_shipping.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q13_customer_distribution.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q13_customer_distribution.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q13_customer_distribution.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q14_promotion_effect.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q14_promotion_effect.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q14_promotion_effect.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q15_top_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q15_top_supplier.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q15_top_supplier.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q16_parts_supplier_relationship.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q17_small_quantity_order_revenue.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q18_large_volume_customer.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q18_large_volume_customer.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q19_discounted_revenue.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q19_discounted_revenue.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q1_pricing_summary_report.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q20_potential_part_promotion.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q21_suppliers_who_kept_orders_waiting.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q22_global_sales_opportunity.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q2_minimum_cost_supplier.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q3_shipping_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q3_shipping_priority.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q3_shipping_priority.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q4_order_priority.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q4_order_priority.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q4_order_priority.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q5_local_supplier_volume.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q6_forecast_revenue_change.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q7_volume_shipping.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q7_volume_shipping.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q7_volume_shipping.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q8_national_market_share.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q8_national_market_share.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q8_national_market_share.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/q9_product_type_profit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/q9_product_type_profit.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/q9_product_type_profit.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u10_nestedloop_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u10_nestedloop_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u1_group_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u1_group_by.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u1_group_by.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u2_select-project.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u2_select-project.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u2_select-project.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u2_select-project.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u3_union.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u3_union.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u3_union.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u3_union.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u4_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u4_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u4_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u4_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u5_lateral_view.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u5_lateral_view.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u5_lateral_view.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u6_limit.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u6_limit.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u6_limit.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u7_multi_join.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u7_multi_join.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u7_multi_join.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u8_non_mapred.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u8_non_mapred.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u8_non_mapred.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u8_non_mapred.plan
diff --git a/hivesterix/src/test/resources/optimizerts/results/u9_order_by.plan b/hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
similarity index 100%
rename from hivesterix/src/test/resources/optimizerts/results/u9_order_by.plan
rename to hivesterix/hivesterix-dist/src/test/resources/optimizerts/results/u9_order_by.plan
diff --git a/hivesterix/src/test/resources/runtimefunctionts/conf/cluster b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/conf/cluster
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/conf/cluster
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/conf/cluster
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/customer.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/customer.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/customer.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/customer.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/large_card_join_src_small.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/lineitem.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/lineitem.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/lineitem.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/lineitem.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/nation.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/nation.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/nation.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/nation.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/orders.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/orders.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/orders.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/orders.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/part.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/part.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/part.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/part.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/partsupp.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/partsupp.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/partsupp.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/partsupp.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/region.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/region.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/region.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/region.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/data/supplier.tbl b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/supplier.tbl
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/data/supplier.tbl
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/data/supplier.tbl
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/core-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/hdfs-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hadoop/conf/mapred-site.xml.bak
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/hive-default.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/hive/conf/topology.xml b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/topology.xml
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/hive/conf/topology.xml
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/hive/conf/topology.xml
diff --git a/hivesterix/src/test/resources/runtimefunctionts/ignore.txt b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/ignore.txt
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/ignore.txt
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/ignore.txt
diff --git a/hivesterix/src/test/resources/runtimefunctionts/logging.properties b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/logging.properties
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/logging.properties
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q10_returned_item.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q11_important_stock.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q12_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q12_shipping.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q13_customer_distribution.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q14_promotion_effect.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q15_top_supplier.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q16_parts_supplier_relationship.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q17_small_quantity_order_revenue.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q18_large_volume_customer.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q19_discounted_revenue.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q1_pricing_summary_report.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q20_potential_part_promotion.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q21_suppliers_who_kept_orders_waiting.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q22_global_sales_opportunity.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q2_minimum_cost_supplier.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q3_shipping_priority.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q4_order_priority.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q5_local_supplier_volume.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q6_forecast_revenue_change.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q7_volume_shipping.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q8_national_market_share.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/q9_product_type_profit.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u10_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u10_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u10_nestedloop_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u1_gby.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u1_gby.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u1_gby.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u1_gby.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u2_gby_external.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u3_union.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u3_union.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u3_union.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u3_union.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u4_gby_distinct.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u5_gby_global.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/queries/u6_large_card_join.hive
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q10_returned_item.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q10_returned_item.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q10_returned_item.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q10_returned_item.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q11_important_stock.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q11_important_stock.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q11_important_stock.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q11_important_stock.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q12_shipping.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q12_shipping.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q12_shipping.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q12_shipping.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q13_customer_distribution.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q14_promotion_effect.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q15_top_supplier.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q15_top_supplier.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q16_parts_supplier_relationship.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q17_small_quantity_order_revenue.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q18_large_volume_customer.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q19_discounted_revenue.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q1_pricing_summary_report.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q20_potential_part_promotion.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q21_suppliers_who_kept_orders_waiting.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q22_global_sales_opportunity.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q2_minimum_cost_supplier.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q3_shipping_priority.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q4_order_priority.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q4_order_priority.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q4_order_priority.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q4_order_priority.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q5_local_supplier_volume.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q6_forecast_revenue_change.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q7_volume_shipping.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q8_national_market_share.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q8_national_market_share.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/q9_product_type_profit.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u10_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u10_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_join.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u10_nestedloop_join.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u1_gby.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u1_gby.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u1_gby.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u1_gby.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u2_gby_external.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u2_gby_external.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u2_gby_external.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u2_gby_external.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u3_union.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u3_union.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u3_union.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u3_union.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u4_gby_distinct.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u5_gby_global.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u5_gby_global.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u5_gby_global.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u5_gby_global.result
diff --git a/hivesterix/src/test/resources/runtimefunctionts/results/u6_large_card_join.result b/hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
similarity index 100%
rename from hivesterix/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
rename to hivesterix/hivesterix-dist/src/test/resources/runtimefunctionts/results/u6_large_card_join.result
diff --git a/hivesterix/hivesterix-optimizer/pom.xml b/hivesterix/hivesterix-optimizer/pom.xml
new file mode 100644
index 0000000..4e6032e
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/pom.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>hivesterix-optimizer</artifactId>
+ <name>hivesterix-optimizer</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-translator</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
new file mode 100644
index 0000000..7e4e271
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
@@ -0,0 +1,113 @@
+package edu.uci.ics.hivesterix.optimizer.rulecollections;
+
+import java.util.LinkedList;
+
+import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
+import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
+import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
+import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
+import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
+
+public final class HiveRuleCollections {
+
+ public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ NORMALIZATION.add(new EliminateSubplanRule());
+ NORMALIZATION.add(new IntroduceAggregateCombinerRule());
+ NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
+ NORMALIZATION.add(new IntroduceAggregateCombinerRule());
+ NORMALIZATION.add(new PushSelectIntoJoinRule());
+ NORMALIZATION.add(new ExtractGbyExpressionsRule());
+ NORMALIZATION.add(new RemoveRedundantSelectRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new FactorRedundantGroupAndDecorVarsRule());
+ COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ // should LoadRecordFieldsRule be applied in only one pass over the
+ // plan?
+ LOAD_FIELDS.add(new InlineVariablesRule());
+ // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
+ LOAD_FIELDS.add(new ComplexJoinInferenceRule());
+ LOAD_FIELDS.add(new InferTypesRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ OP_PUSHDOWN.add(new PushProjectDownRule());
+ OP_PUSHDOWN.add(new PushSelectDownRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ DATA_EXCHANGE.add(new SetExecutionModeRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ CONSOLIDATION.add(new RemoveRedundantProjectionRule());
+ CONSOLIDATION.add(new ConsolidateSelectsRule());
+ CONSOLIDATION.add(new IntroduceEarlyProjectRule());
+ CONSOLIDATION.add(new ConsolidateAssignsRule());
+ CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
+ CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
+ PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
+ PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
+ }
+
+ public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
+ static {
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
+ HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
+ prepareJobGenRules.add(new ExtractCommonOperatorsRule());
+ prepareJobGenRules.add(new LocalGroupByRule());
+ prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
+ prepareJobGenRules.add(new ReinferAllTypesRule());
+ }
+
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
new file mode 100644
index 0000000..90777ee
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
@@ -0,0 +1,79 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.StreamProjectPOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class InsertProjectBeforeWriteRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+ return false;
+ }
+
+ /**
+ * When the input schema to WriteOperator is different from the output
+ * schema in terms of variable order, add a project operator to get the
+ * write order
+ */
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.WRITE) {
+ return false;
+ }
+ WriteOperator opWrite = (WriteOperator) op;
+ ArrayList<LogicalVariable> finalSchema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getUsedVariables(opWrite, finalSchema);
+ ArrayList<LogicalVariable> inputSchema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(opWrite, inputSchema);
+ if (!isIdentical(finalSchema, inputSchema)) {
+ ProjectOperator projectOp = new ProjectOperator(finalSchema);
+ Mutable<ILogicalOperator> parentOpRef = opWrite.getInputs().get(0);
+ projectOp.getInputs().add(parentOpRef);
+ opWrite.getInputs().clear();
+ opWrite.getInputs().add(new MutableObject<ILogicalOperator>(projectOp));
+ projectOp.setPhysicalOperator(new StreamProjectPOperator());
+ projectOp.setExecutionMode(ExecutionMode.PARTITIONED);
+
+ AbstractLogicalOperator op2 = (AbstractLogicalOperator) parentOpRef.getValue();
+ if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT) {
+ ProjectOperator pi2 = (ProjectOperator) op2;
+ parentOpRef.setValue(pi2.getInputs().get(0).getValue());
+ }
+ context.computeAndSetTypeEnvironmentForOperator(projectOp);
+ return true;
+ } else
+ return false;
+
+ }
+
+ private boolean isIdentical(List<LogicalVariable> finalSchema, List<LogicalVariable> inputSchema) {
+ int finalSchemaSize = finalSchema.size();
+ int inputSchemaSize = inputSchema.size();
+ if (finalSchemaSize != inputSchemaSize)
+ throw new IllegalStateException("final output schema variables missing!");
+ for (int i = 0; i < finalSchemaSize; i++) {
+ LogicalVariable var1 = finalSchema.get(i);
+ LogicalVariable var2 = inputSchema.get(i);
+ if (!var1.equals(var2))
+ return false;
+ }
+ return true;
+ }
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
new file mode 100644
index 0000000..0a18629
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
@@ -0,0 +1,73 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class IntroduceEarlyProjectRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.PROJECT) {
+ return false;
+ }
+ AbstractLogicalOperator middleOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
+ List<LogicalVariable> deliveredVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
+
+ VariableUtilities.getUsedVariables(op, deliveredVars);
+ VariableUtilities.getUsedVariables(middleOp, usedVars);
+ VariableUtilities.getProducedVariables(middleOp, producedVars);
+
+ Set<LogicalVariable> requiredVariables = new HashSet<LogicalVariable>();
+ requiredVariables.addAll(deliveredVars);
+ requiredVariables.addAll(usedVars);
+ requiredVariables.removeAll(producedVars);
+
+ if (middleOp.getInputs().size() <= 0 || middleOp.getInputs().size() > 1)
+ return false;
+
+ AbstractLogicalOperator targetOp = (AbstractLogicalOperator) middleOp.getInputs().get(0).getValue();
+ if (targetOp.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN)
+ return false;
+
+ Set<LogicalVariable> deliveredEarlyVars = new HashSet<LogicalVariable>();
+ VariableUtilities.getLiveVariables(targetOp, deliveredEarlyVars);
+
+ deliveredEarlyVars.removeAll(requiredVariables);
+ if (deliveredEarlyVars.size() > 0) {
+ ArrayList<LogicalVariable> requiredVars = new ArrayList<LogicalVariable>();
+ requiredVars.addAll(requiredVariables);
+ ILogicalOperator earlyProjectOp = new ProjectOperator(requiredVars);
+ Mutable<ILogicalOperator> earlyProjectOpRef = new MutableObject<ILogicalOperator>(earlyProjectOp);
+ Mutable<ILogicalOperator> targetRef = middleOp.getInputs().get(0);
+ middleOp.getInputs().set(0, earlyProjectOpRef);
+ earlyProjectOp.getInputs().add(targetRef);
+ context.computeAndSetTypeEnvironmentForOperator(earlyProjectOp);
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
new file mode 100644
index 0000000..90ca008
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
@@ -0,0 +1,66 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IPhysicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class LocalGroupByRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
+ return false;
+ }
+ Boolean localGby = (Boolean) op.getAnnotations().get(HiveOperatorAnnotations.LOCAL_GROUP_BY);
+ if (localGby != null && localGby.equals(Boolean.TRUE)) {
+ Boolean hashGby = (Boolean) op.getAnnotations().get(OperatorAnnotations.USE_HASH_GROUP_BY);
+ Boolean externalGby = (Boolean) op.getAnnotations().get(OperatorAnnotations.USE_EXTERNAL_GROUP_BY);
+ if ((hashGby != null && (hashGby.equals(Boolean.TRUE)) || (externalGby != null && externalGby
+ .equals(Boolean.TRUE)))) {
+ reviseExchange(op);
+ } else {
+ ILogicalOperator child = op.getInputs().get(0).getValue();
+ AbstractLogicalOperator childOp = (AbstractLogicalOperator) child;
+ while (child.getInputs().size() > 0) {
+ if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
+ break;
+ else {
+ child = child.getInputs().get(0).getValue();
+ childOp = (AbstractLogicalOperator) child;
+ }
+ }
+ if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
+ reviseExchange(childOp);
+ }
+ return true;
+ }
+ return false;
+ }
+
+ private void reviseExchange(AbstractLogicalOperator op) {
+ ExchangeOperator exchange = (ExchangeOperator) op.getInputs().get(0).getValue();
+ IPhysicalOperator physicalOp = exchange.getPhysicalOperator();
+ if (physicalOp.getOperatorTag() == PhysicalOperatorTag.HASH_PARTITION_EXCHANGE) {
+ exchange.setPhysicalOperator(new OneToOneExchangePOperator());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
new file mode 100644
index 0000000..44ff12d
--- /dev/null
+++ b/hivesterix/hivesterix-optimizer/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
@@ -0,0 +1,44 @@
+package edu.uci.ics.hivesterix.optimizer.rules;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+public class RemoveRedundantSelectRule implements IAlgebraicRewriteRule {
+
+ @Override
+ public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+ return false;
+ }
+
+ @Override
+ public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+ throws AlgebricksException {
+ AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+ if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
+ return false;
+ }
+ AbstractLogicalOperator inputOp = (AbstractLogicalOperator) op.getInputs().get(0).getValue();
+ if (inputOp.getOperatorTag() != LogicalOperatorTag.SELECT) {
+ return false;
+ }
+ SelectOperator selectOp = (SelectOperator) op;
+ SelectOperator inputSelectOp = (SelectOperator) inputOp;
+ ILogicalExpression expr1 = selectOp.getCondition().getValue();
+ ILogicalExpression expr2 = inputSelectOp.getCondition().getValue();
+
+ if (expr1.equals(expr2)) {
+ selectOp.getInputs().set(0, inputSelectOp.getInputs().get(0));
+ return true;
+ }
+ return false;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/pom.xml b/hivesterix/hivesterix-runtime/pom.xml
new file mode 100644
index 0000000..77db0d4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/pom.xml
@@ -0,0 +1,383 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-runtime</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <name>hivesterix-runtime</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>javax.servlet</groupId>
+ <artifactId>servlet-api</artifactId>
+ <version>2.5</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>4.8.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>args4j</groupId>
+ <artifactId>args4j</artifactId>
+ <version>2.0.12</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.json</groupId>
+ <artifactId>json</artifactId>
+ <version>20090211</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-server</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.eclipse.jetty</groupId>
+ <artifactId>jetty-servlet</artifactId>
+ <version>8.0.0.M1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ </dependency>
+ <dependency>
+ <groupId>jline</groupId>
+ <artifactId>jline</artifactId>
+ <version>0.9.94</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-core</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-connectionpool</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-enhancer</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.datanucleus</groupId>
+ <artifactId>datanucleus-rdbms</artifactId>
+ <version>2.0.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-dbcp</groupId>
+ <artifactId>commons-dbcp</artifactId>
+ <version>1.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-pool</groupId>
+ <artifactId>commons-pool</artifactId>
+ <version>1.5.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-collections</groupId>
+ <artifactId>commons-collections</artifactId>
+ <version>3.2.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ <version>2.4</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>javax</groupId>
+ <artifactId>jdo2-api</artifactId>
+ <version>2.3-ec</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.facebook</groupId>
+ <artifactId>libfb303</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.thrift</groupId>
+ <artifactId>libthrift</artifactId>
+ <version>0.5.0</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>cli</artifactId>
+ <version>1.2</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.15</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>antlr-runtime</artifactId>
+ <version>3.0.1</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-cli</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-hwi</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-jdbc</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-metastore</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-service</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-shims</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-api</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-cli</groupId>
+ <artifactId>commons-cli</artifactId>
+ <version>1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <version>1.6.1</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-test</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>commons-logging</groupId>
+ <artifactId>commons-logging</artifactId>
+ <version>1.1.1</version>
+ <type>jar</type>
+ <classifier>api</classifier>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.google.guava</groupId>
+ <artifactId>guava</artifactId>
+ <version>r06</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.antlr</groupId>
+ <artifactId>stringtemplate</artifactId>
+ <version>3.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.derby</groupId>
+ <artifactId>derby</artifactId>
+ <version>10.8.1.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hbase</groupId>
+ <artifactId>hbase</artifactId>
+ <version>0.90.3</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-cc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-control-nc</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-serde</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <executions>
+ <execution>
+ <id>patch</id>
+ <goals>
+ <goal>jar</goal>
+ </goals>
+ <phase>package</phase>
+ <configuration>
+ <classifier>patch</classifier>
+ <finalName>a-hive-rumtime</finalName>
+ <includes>
+ <include>**/org/apache/**</include>
+ </includes>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+ <repositories>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+</project>
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
new file mode 100644
index 0000000..ad02239
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
@@ -0,0 +1,169 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
+
+ private List<ICopyEvaluator> children;
+
+ private ExprNodeEvaluator evaluator;
+
+ private IDataOutputProvider out;
+
+ private ObjectInspector inspector;
+
+ /**
+ * output object inspector
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * cached row object
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * serializer/derialzer for lazy object
+ */
+ private SerDe lazySer;
+
+ /**
+ * data output
+ */
+ DataOutput dataOutput;
+
+ public AbstractExpressionEvaluator(ExprNodeEvaluator hiveEvaluator, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ evaluator = hiveEvaluator;
+ out = output;
+ inspector = oi;
+ dataOutput = out.getDataOutput();
+ }
+
+ protected ObjectInspector getRowInspector() {
+ return null;
+ }
+
+ protected IDataOutputProvider getIDataOutputProvider() {
+ return out;
+ }
+
+ protected ExprNodeEvaluator getHiveEvaluator() {
+ return evaluator;
+ }
+
+ public ObjectInspector getObjectInspector() {
+ return inspector;
+ }
+
+ @Override
+ public void evaluate(IFrameTupleReference r) throws AlgebricksException {
+ // initialize hive evaluator
+ try {
+ if (outputInspector == null)
+ outputInspector = evaluator.initialize(inspector);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+
+ readIntoCache(r);
+ try {
+ Object result = evaluator.evaluate(cachedRowObject);
+
+ // if (result == null) {
+ // result = evaluator.evaluate(cachedRowObject);
+ //
+ // // check if result is null
+ //
+ // String errorMsg = "serialize null object in \n output " +
+ // outputInspector.toString() + " \n input "
+ // + inspector.toString() + "\n ";
+ // errorMsg += "";
+ // List<Object> columns = ((StructObjectInspector)
+ // inspector).getStructFieldsDataAsList(cachedRowObject);
+ // for (Object column : columns) {
+ // errorMsg += column.toString() + " ";
+ // }
+ // errorMsg += "\n";
+ // Log.info(errorMsg);
+ // System.out.println(errorMsg);
+ // // result = new BooleanWritable(true);
+ // throw new IllegalStateException(errorMsg);
+ // }
+
+ serializeResult(result);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ } catch (IOException e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result) throws IOException, AlgebricksException {
+ if (lazySer == null)
+ lazySer = new LazySerDe();
+
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, outputInspector);
+ dataOutput.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ if (cachedRowObject == null)
+ cachedRowObject = (LazyObject<? extends ObjectInspector>) LazyFactory.createLazyObject(inspector);
+ cachedRowObject.init(r);
+ }
+
+ /**
+ * set a list of children of this evaluator
+ *
+ * @param children
+ */
+ public void setChildren(List<ICopyEvaluator> children) {
+ this.children = children;
+ }
+
+ public void addChild(ICopyEvaluator child) {
+ if (children == null)
+ children = new ArrayList<ICopyEvaluator>();
+ children.add(child);
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
new file mode 100644
index 0000000..e500376
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
@@ -0,0 +1,224 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
+
+ /**
+ * the mode of aggregation function
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * an array of evaluators
+ */
+ private ExprNodeEvaluator[] evaluators;
+
+ /**
+ * udaf evaluator partial
+ */
+ private GenericUDAFEvaluator udafPartial;
+
+ /**
+ * udaf evaluator complete
+ */
+ private GenericUDAFEvaluator udafComplete;
+
+ /**
+ * cached parameter objects
+ */
+ private Object[] cachedParameters;
+
+ /**
+ * cached row objects
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * the output channel
+ */
+ private DataOutput out;
+
+ /**
+ * aggregation buffer
+ */
+ private AggregationBuffer aggBuffer;
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private SerDe lazySer;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspectorPartial;
+
+ /**
+ * parameter inspectors
+ */
+ private ObjectInspector[] parameterInspectors;
+
+ /**
+ * output make sure the aggregation functio has least object creation
+ *
+ * @param desc
+ * @param oi
+ * @param output
+ */
+ public AggregationFunctionEvaluator(List<ExprNodeDesc> inputs, List<TypeInfo> inputTypes, String genericUDAFName,
+ GenericUDAFEvaluator.Mode aggMode, boolean distinct, ObjectInspector oi, DataOutput output,
+ ExprNodeEvaluator[] evals, ObjectInspector[] pInspectors, Object[] parameterCache, SerDe serde,
+ LazyObject<? extends ObjectInspector> row, GenericUDAFEvaluator udafunctionPartial,
+ GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi, ObjectInspector outputOiPartial) {
+ // shared object across threads
+ this.out = output;
+ this.mode = aggMode;
+ this.parameterInspectors = pInspectors;
+
+ // thread local objects
+ this.evaluators = evals;
+ this.cachedParameters = parameterCache;
+ this.cachedRowObject = row;
+ this.lazySer = serde;
+ this.udafPartial = udafunctionPartial;
+ this.udafComplete = udafunctionComplete;
+ this.outputInspector = outputOi;
+ this.outputInspectorPartial = outputOiPartial;
+ }
+
+ @Override
+ public void init() throws AlgebricksException {
+ try {
+ aggBuffer = udafPartial.getNewAggregationBuffer();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void step(IFrameTupleReference tuple) throws AlgebricksException {
+ readIntoCache(tuple);
+ processRow();
+ }
+
+ private void processRow() throws AlgebricksException {
+ try {
+ // get values by evaluating them
+ for (int i = 0; i < cachedParameters.length; i++) {
+ cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
+ }
+ processAggregate();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void processAggregate() throws HiveException {
+ /**
+ * accumulate the aggregation function
+ */
+ switch (mode) {
+ case PARTIAL1:
+ case COMPLETE:
+ udafPartial.iterate(aggBuffer, cachedParameters);
+ break;
+ case PARTIAL2:
+ case FINAL:
+ if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
+ Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
+ .getPrimitiveWritableObject(cachedParameters[0]);
+ udafPartial.merge(aggBuffer, parameter);
+ } else
+ udafPartial.merge(aggBuffer, cachedParameters[0]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result, ObjectInspector oi) throws IOException, AlgebricksException {
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, oi);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ }
+
+ @Override
+ public void finish() throws AlgebricksException {
+ // aggregator
+ try {
+ Object result = null;
+ result = udafPartial.terminatePartial(aggBuffer);
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE || mode == GenericUDAFEvaluator.Mode.FINAL) {
+ result = udafComplete.terminate(aggBuffer);
+ serializeResult(result, outputInspector);
+ } else {
+ serializeResult(result, outputInspectorPartial);
+ }
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void finishPartial() throws AlgebricksException {
+ // aggregator.
+ try {
+ Object result = null;
+ // get aggregations
+ result = udafPartial.terminatePartial(aggBuffer);
+ serializeResult(result, outputInspectorPartial);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
new file mode 100644
index 0000000..1933253
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
@@ -0,0 +1,248 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class AggregatuibFunctionSerializableEvaluator implements ICopySerializableAggregateFunction {
+
+ /**
+ * the mode of aggregation function
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * an array of evaluators
+ */
+ private ExprNodeEvaluator[] evaluators;
+
+ /**
+ * udaf evaluator partial
+ */
+ private GenericUDAFEvaluator udafPartial;
+
+ /**
+ * udaf evaluator complete
+ */
+ private GenericUDAFEvaluator udafComplete;
+
+ /**
+ * cached parameter objects
+ */
+ private Object[] cachedParameters;
+
+ /**
+ * cached row objects
+ */
+ private LazyObject<? extends ObjectInspector> cachedRowObject;
+
+ /**
+ * aggregation buffer
+ */
+ private SerializableBuffer aggBuffer;
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private SerDe lazySer;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * the output object inspector for this aggregation function
+ */
+ private ObjectInspector outputInspectorPartial;
+
+ /**
+ * parameter inspectors
+ */
+ private ObjectInspector[] parameterInspectors;
+
+ /**
+ * output make sure the aggregation functio has least object creation
+ *
+ * @param desc
+ * @param oi
+ * @param output
+ */
+ public AggregatuibFunctionSerializableEvaluator(List<ExprNodeDesc> inputs, List<TypeInfo> inputTypes,
+ String genericUDAFName, GenericUDAFEvaluator.Mode aggMode, boolean distinct, ObjectInspector oi,
+ ExprNodeEvaluator[] evals, ObjectInspector[] pInspectors, Object[] parameterCache, SerDe serde,
+ LazyObject<? extends ObjectInspector> row, GenericUDAFEvaluator udafunctionPartial,
+ GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi, ObjectInspector outputOiPartial)
+ throws AlgebricksException {
+ // shared object across threads
+ this.mode = aggMode;
+ this.parameterInspectors = pInspectors;
+
+ // thread local objects
+ this.evaluators = evals;
+ this.cachedParameters = parameterCache;
+ this.cachedRowObject = row;
+ this.lazySer = serde;
+ this.udafPartial = udafunctionPartial;
+ this.udafComplete = udafunctionComplete;
+ this.outputInspector = outputOi;
+ this.outputInspectorPartial = outputOiPartial;
+
+ try {
+ aggBuffer = (SerializableBuffer) udafPartial.getNewAggregationBuffer();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void init(DataOutput output) throws AlgebricksException {
+ try {
+ udafPartial.reset(aggBuffer);
+ outputAggBuffer(aggBuffer, output);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void step(IFrameTupleReference tuple, byte[] data, int start, int len) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ readIntoCache(tuple);
+ processRow();
+ serializeAggBuffer(aggBuffer, data, start, len);
+ }
+
+ private void processRow() throws AlgebricksException {
+ try {
+ // get values by evaluating them
+ for (int i = 0; i < cachedParameters.length; i++) {
+ cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
+ }
+ processAggregate();
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void processAggregate() throws HiveException {
+ /**
+ * accumulate the aggregation function
+ */
+ switch (mode) {
+ case PARTIAL1:
+ case COMPLETE:
+ udafPartial.iterate(aggBuffer, cachedParameters);
+ break;
+ case PARTIAL2:
+ case FINAL:
+ if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
+ Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
+ .getPrimitiveWritableObject(cachedParameters[0]);
+ udafPartial.merge(aggBuffer, parameter);
+ } else
+ udafPartial.merge(aggBuffer, cachedParameters[0]);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result, ObjectInspector oi, DataOutput out) throws IOException,
+ AlgebricksException {
+ try {
+ BytesWritable outputWritable = (BytesWritable) lazySer.serialize(result, oi);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ } catch (SerDeException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ }
+
+ @Override
+ public void finish(byte[] data, int start, int len, DataOutput output) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ // aggregator
+ try {
+ Object result = null;
+ result = udafPartial.terminatePartial(aggBuffer);
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE || mode == GenericUDAFEvaluator.Mode.FINAL) {
+ result = udafComplete.terminate(aggBuffer);
+ serializeResult(result, outputInspector, output);
+ } else {
+ serializeResult(result, outputInspectorPartial, output);
+ }
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ @Override
+ public void finishPartial(byte[] data, int start, int len, DataOutput output) throws AlgebricksException {
+ deSerializeAggBuffer(aggBuffer, data, start, len);
+ // aggregator.
+ try {
+ Object result = null;
+ // get aggregations
+ result = udafPartial.terminatePartial(aggBuffer);
+ serializeResult(result, outputInspectorPartial, output);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private void serializeAggBuffer(SerializableBuffer buffer, byte[] data, int start, int len)
+ throws AlgebricksException {
+ buffer.serializeAggBuffer(data, start, len);
+ }
+
+ private void deSerializeAggBuffer(SerializableBuffer buffer, byte[] data, int start, int len)
+ throws AlgebricksException {
+ buffer.deSerializeAggBuffer(data, start, len);
+ }
+
+ private void outputAggBuffer(SerializableBuffer buffer, DataOutput out) throws AlgebricksException {
+ try {
+ buffer.serializeAggBuffer(out);
+ } catch (IOException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
new file mode 100644
index 0000000..96065e5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
@@ -0,0 +1,67 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+public class BufferSerDeUtil {
+
+ public static double getDouble(byte[] bytes, int offset) {
+ return Double.longBitsToDouble(getLong(bytes, offset));
+ }
+
+ public static float getFloat(byte[] bytes, int offset) {
+ return Float.intBitsToFloat(getInt(bytes, offset));
+ }
+
+ public static boolean getBoolean(byte[] bytes, int offset) {
+ if (bytes[offset] == 0)
+ return false;
+ else
+ return true;
+ }
+
+ public static int getInt(byte[] bytes, int offset) {
+ return ((bytes[offset] & 0xff) << 24) + ((bytes[offset + 1] & 0xff) << 16) + ((bytes[offset + 2] & 0xff) << 8)
+ + ((bytes[offset + 3] & 0xff) << 0);
+ }
+
+ public static long getLong(byte[] bytes, int offset) {
+ return (((long) (bytes[offset] & 0xff)) << 56) + (((long) (bytes[offset + 1] & 0xff)) << 48)
+ + (((long) (bytes[offset + 2] & 0xff)) << 40) + (((long) (bytes[offset + 3] & 0xff)) << 32)
+ + (((long) (bytes[offset + 4] & 0xff)) << 24) + (((long) (bytes[offset + 5] & 0xff)) << 16)
+ + (((long) (bytes[offset + 6] & 0xff)) << 8) + (((long) (bytes[offset + 7] & 0xff)) << 0);
+ }
+
+ public static void writeBoolean(boolean value, byte[] bytes, int offset) {
+ if (value)
+ bytes[offset] = (byte) 1;
+ else
+ bytes[offset] = (byte) 0;
+ }
+
+ public static void writeInt(int value, byte[] bytes, int offset) {
+ bytes[offset++] = (byte) (value >> 24);
+ bytes[offset++] = (byte) (value >> 16);
+ bytes[offset++] = (byte) (value >> 8);
+ bytes[offset++] = (byte) (value);
+ }
+
+ public static void writeLong(long value, byte[] bytes, int offset) {
+ bytes[offset++] = (byte) (value >> 56);
+ bytes[offset++] = (byte) (value >> 48);
+ bytes[offset++] = (byte) (value >> 40);
+ bytes[offset++] = (byte) (value >> 32);
+ bytes[offset++] = (byte) (value >> 24);
+ bytes[offset++] = (byte) (value >> 16);
+ bytes[offset++] = (byte) (value >> 8);
+ bytes[offset++] = (byte) (value);
+ }
+
+ public static void writeDouble(double value, byte[] bytes, int offset) {
+ long lValue = Double.doubleToLongBits(value);
+ writeLong(lValue, bytes, offset);
+ }
+
+ public static void writeFloat(float value, byte[] bytes, int offset) {
+ int iValue = Float.floatToIntBits(value);
+ writeInt(iValue, bytes, offset);
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
index 3296e19..5647f6a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ColumnExpressionEvaluator.java
@@ -9,10 +9,9 @@
public class ColumnExpressionEvaluator extends AbstractExpressionEvaluator {
- public ColumnExpressionEvaluator(ExprNodeColumnDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeColumnEvaluator(expr), oi, output);
- }
+ public ColumnExpressionEvaluator(ExprNodeColumnDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeColumnEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
index 62928e6..d8796ea 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ConstantExpressionEvaluator.java
@@ -9,9 +9,8 @@
public class ConstantExpressionEvaluator extends AbstractExpressionEvaluator {
- public ConstantExpressionEvaluator(ExprNodeConstantDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeConstantEvaluator(expr), oi, output);
- }
+ public ConstantExpressionEvaluator(ExprNodeConstantDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeConstantEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
similarity index 68%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
index 5f6a5dc..35560b6 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FieldExpressionEvaluator.java
@@ -9,9 +9,9 @@
public class FieldExpressionEvaluator extends AbstractExpressionEvaluator {
- public FieldExpressionEvaluator(ExprNodeFieldDesc expr, ObjectInspector oi,
- IDataOutputProvider output) throws AlgebricksException {
- super(new ExprNodeFieldEvaluator(expr), oi, output);
- }
+ public FieldExpressionEvaluator(ExprNodeFieldDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeFieldEvaluator(expr), oi, output);
+ }
}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
index c3f3c93..7ffec7a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/FunctionExpressionEvaluator.java
@@ -9,10 +9,9 @@
public class FunctionExpressionEvaluator extends AbstractExpressionEvaluator {
- public FunctionExpressionEvaluator(ExprNodeGenericFuncDesc expr,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- super(new ExprNodeGenericFuncEvaluator(expr), oi, output);
- }
+ public FunctionExpressionEvaluator(ExprNodeGenericFuncDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeGenericFuncEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
similarity index 68%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
index cbe5561..ca60385 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/NullExpressionEvaluator.java
@@ -9,8 +9,8 @@
public class NullExpressionEvaluator extends AbstractExpressionEvaluator {
- public NullExpressionEvaluator(ExprNodeNullDesc expr, ObjectInspector oi,
- IDataOutputProvider output) throws AlgebricksException {
- super(new ExprNodeNullEvaluator(expr), oi, output);
- }
+ public NullExpressionEvaluator(ExprNodeNullDesc expr, ObjectInspector oi, IDataOutputProvider output)
+ throws AlgebricksException {
+ super(new ExprNodeNullEvaluator(expr), oi, output);
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
new file mode 100644
index 0000000..676989e
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
@@ -0,0 +1,16 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+
+public interface SerializableBuffer extends AggregationBuffer {
+
+ public void deSerializeAggBuffer(byte[] data, int start, int len);
+
+ public void serializeAggBuffer(byte[] data, int start, int len);
+
+ public void serializeAggBuffer(DataOutput output) throws IOException;
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
new file mode 100644
index 0000000..2e78663
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
@@ -0,0 +1,143 @@
+package edu.uci.ics.hivesterix.runtime.evaluator;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.Collector;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
+
+ /**
+ * udtf function
+ */
+ private UDTFDesc func;
+
+ /**
+ * input object inspector
+ */
+ private ObjectInspector inputInspector;
+
+ /**
+ * output object inspector
+ */
+ private ObjectInspector outputInspector;
+
+ /**
+ * object inspector for udtf
+ */
+ private ObjectInspector[] udtfInputOIs;
+
+ /**
+ * generic udtf
+ */
+ private GenericUDTF udtf;
+
+ /**
+ * data output
+ */
+ private DataOutput out;
+
+ /**
+ * the input row object
+ */
+ private LazyColumnar cachedRowObject;
+
+ /**
+ * cached row object (input)
+ */
+ private Object[] cachedInputObjects;
+
+ /**
+ * serialization/deserialization
+ */
+ private SerDe lazySerDe;
+
+ /**
+ * columns feed into UDTF
+ */
+ private int[] columns;
+
+ public UDTFFunctionEvaluator(UDTFDesc desc, Schema schema, int[] cols, DataOutput output) {
+ this.func = desc;
+ this.inputInspector = schema.toObjectInspector();
+ udtf = func.getGenericUDTF();
+ out = output;
+ columns = cols;
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) throws AlgebricksException {
+ cachedInputObjects = new LazyObject[columns.length];
+ try {
+ cachedRowObject = (LazyColumnar) LazyFactory.createLazyObject(inputInspector);
+ outputInspector = udtf.initialize(udtfInputOIs);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udtf.setCollector(this);
+ lazySerDe = new LazySerDe();
+ readIntoCache(tuple);
+ }
+
+ @Override
+ public boolean step() throws AlgebricksException {
+ try {
+ udtf.process(cachedInputObjects);
+ return true;
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ /**
+ * bind the tuple reference to the cached row object
+ *
+ * @param r
+ */
+ private void readIntoCache(IFrameTupleReference r) {
+ cachedRowObject.init(r);
+ for (int i = 0; i < cachedInputObjects.length; i++) {
+ cachedInputObjects[i] = cachedRowObject.getField(columns[i]);
+ }
+ }
+
+ /**
+ * serialize the result
+ *
+ * @param result
+ * the evaluation result
+ * @throws IOException
+ * @throws AlgebricksException
+ */
+ private void serializeResult(Object result) throws SerDeException, IOException {
+ BytesWritable outputWritable = (BytesWritable) lazySerDe.serialize(result, outputInspector);
+ out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
+ }
+
+ @Override
+ public void collect(Object input) throws HiveException {
+ try {
+ serializeResult(input);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ } catch (SerDeException e) {
+ throw new HiveException(e);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..f3b76e4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
@@ -0,0 +1,34 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveByteBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveByteBinaryAscComparatorFactory INSTANCE = new HiveByteBinaryAscComparatorFactory();
+
+ private HiveByteBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private byte left;
+ private byte right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = b1[s1];
+ right = b2[s2];
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..8d452dc
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveByteBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveByteBinaryDescComparatorFactory INSTANCE = new HiveByteBinaryDescComparatorFactory();
+
+ private HiveByteBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private byte left;
+ private byte right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = b1[s1];
+ right = b2[s2];
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..0b5350a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveDoubleBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryAscComparatorFactory INSTANCE = new HiveDoubleBinaryAscComparatorFactory();
+
+ private HiveDoubleBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private double left;
+ private double right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b1, s1));
+ right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2, s2));
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..2405956
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveDoubleBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryDescComparatorFactory INSTANCE = new HiveDoubleBinaryDescComparatorFactory();
+
+ private HiveDoubleBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private double left;
+ private double right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b1, s1));
+ right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2, s2));
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..05a43e6
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveFloatBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveFloatBinaryAscComparatorFactory INSTANCE = new HiveFloatBinaryAscComparatorFactory();
+
+ private HiveFloatBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private float left;
+ private float right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
+ right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..2c44f97
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveFloatBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveFloatBinaryDescComparatorFactory INSTANCE = new HiveFloatBinaryDescComparatorFactory();
+
+ private HiveFloatBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private float left;
+ private float right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
+ right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..0127791
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveIntegerBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveIntegerBinaryAscComparatorFactory INSTANCE = new HiveIntegerBinaryAscComparatorFactory();
+
+ private HiveIntegerBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt left = new VInt();
+ private VInt right = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, left);
+ LazyUtils.readVInt(b2, s2, right);
+
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + "," + right.length + " expected " + l1 + "," + l2);
+
+ if (left.value > right.value)
+ return 1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..5116337
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveIntegerBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveIntegerBinaryDescComparatorFactory INSTANCE = new HiveIntegerBinaryDescComparatorFactory();
+
+ private HiveIntegerBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt left = new VInt();
+ private VInt right = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, left);
+ LazyUtils.readVInt(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return -1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..fa416a9
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveLongBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveLongBinaryAscComparatorFactory INSTANCE = new HiveLongBinaryAscComparatorFactory();
+
+ private HiveLongBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VLong left = new VLong();
+ private VLong right = new VLong();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVLong(b1, s1, left);
+ LazyUtils.readVLong(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return 1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..e72dc62
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveLongBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static final HiveLongBinaryDescComparatorFactory INSTANCE = new HiveLongBinaryDescComparatorFactory();
+
+ private HiveLongBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VLong left = new VLong();
+ private VLong right = new VLong();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVLong(b1, s1, left);
+ LazyUtils.readVLong(b2, s2, right);
+ if (left.length != l1 || right.length != l2)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + left.length + " expected " + l1);
+ if (left.value > right.value)
+ return -1;
+ else if (left.value == right.value)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..a3745fa
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveShortBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveShortBinaryAscComparatorFactory INSTANCE = new HiveShortBinaryAscComparatorFactory();
+
+ private HiveShortBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private short left;
+ private short right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = LazyUtils.byteArrayToShort(b1, s1);
+ right = LazyUtils.byteArrayToShort(b2, s2);
+ if (left > right)
+ return 1;
+ else if (left == right)
+ return 0;
+ else
+ return -1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..44d3f43
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveShortBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveShortBinaryDescComparatorFactory INSTANCE = new HiveShortBinaryDescComparatorFactory();
+
+ private HiveShortBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private short left;
+ private short right;
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ left = LazyUtils.byteArrayToShort(b1, s1);
+ right = LazyUtils.byteArrayToShort(b2, s2);
+ if (left > right)
+ return -1;
+ else if (left == right)
+ return 0;
+ else
+ return 1;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
new file mode 100644
index 0000000..6da9716
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveStringBinaryAscComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStringBinaryAscComparatorFactory INSTANCE = new HiveStringBinaryAscComparatorFactory();
+
+ private HiveStringBinaryAscComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt leftLen = new VInt();
+ private VInt rightLen = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, leftLen);
+ LazyUtils.readVInt(b2, s2, rightLen);
+
+ if (leftLen.value + leftLen.length != l1 || rightLen.value + rightLen.length != l2)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (leftLen.value + leftLen.length) + ", " + (rightLen.value + rightLen.length)
+ + " but get " + l1 + ", " + l2);
+
+ return Text.Comparator.compareBytes(b1, s1 + leftLen.length, l1 - leftLen.length, b2, s2
+ + rightLen.length, l2 - rightLen.length);
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
new file mode 100644
index 0000000..c579711
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.factory.comparator;
+
+import org.apache.hadoop.io.WritableComparator;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveStringBinaryDescComparatorFactory implements IBinaryComparatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStringBinaryDescComparatorFactory INSTANCE = new HiveStringBinaryDescComparatorFactory();
+
+ private HiveStringBinaryDescComparatorFactory() {
+ }
+
+ @Override
+ public IBinaryComparator createBinaryComparator() {
+ return new IBinaryComparator() {
+ private VInt leftLen = new VInt();
+ private VInt rightLen = new VInt();
+
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
+ LazyUtils.readVInt(b1, s1, leftLen);
+ LazyUtils.readVInt(b2, s2, rightLen);
+
+ if (leftLen.value + leftLen.length != l1 || rightLen.value + rightLen.length != l2)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (leftLen.value + leftLen.length) + ", " + (rightLen.value + rightLen.length)
+ + " but get " + l1 + ", " + l2);
+
+ return -WritableComparator.compareBytes(b1, s1 + leftLen.length, l1 - leftLen.length, b2, s2
+ + rightLen.length, l2 - rightLen.length);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
new file mode 100644
index 0000000..d664341
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
@@ -0,0 +1,367 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class AggregationFunctionFactory implements ICopyAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ types.add(type);
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopyAggregateFunction createAggregateFunction(IDataOutputProvider provider)
+ throws AlgebricksException {
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregationFunctionEvaluator(parameters, types, genericUDAFName, mode, distinct, rowInspector,
+ provider.getDataOutput(), evals, parameterInspectors, cachedParas, lazySer, cachedRowObject,
+ udafPartial, udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
new file mode 100644
index 0000000..54a1155
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
@@ -0,0 +1,366 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
+import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.AggregatuibFunctionSerializableEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
+import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+
+public class AggregationFunctionSerializableFactory implements ICopySerializableAggregateFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * list of parameters' serialization
+ */
+ private List<String> parametersSerialization = new ArrayList<String>();
+
+ /**
+ * the name of the udf
+ */
+ private String genericUDAFName;
+
+ /**
+ * aggregation mode
+ */
+ private GenericUDAFEvaluator.Mode mode;
+
+ /**
+ * list of type info
+ */
+ private List<TypeInfo> types = new ArrayList<TypeInfo>();
+
+ /**
+ * distinct or not
+ */
+ private boolean distinct;
+
+ /**
+ * the schema of incoming rows
+ */
+ private Schema rowSchema;
+
+ /**
+ * list of parameters
+ */
+ private transient List<ExprNodeDesc> parametersOrigin;
+
+ /**
+ * row inspector
+ */
+ private transient ObjectInspector rowInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspector = null;
+
+ /**
+ * output object inspector
+ */
+ private transient ObjectInspector outputInspectorPartial = null;
+
+ /**
+ * parameter inspectors
+ */
+ private transient ObjectInspector[] parameterInspectors = null;
+
+ /**
+ * expression desc
+ */
+ private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * udaf evaluators
+ */
+ private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * aggregation function desc
+ */
+ private transient AggregationDesc aggregator;
+
+ /**
+ * @param aggregator
+ * Algebricks function call expression
+ * @param oi
+ * schema
+ */
+ public AggregationFunctionSerializableFactory(AggregateFunctionCallExpression expression, Schema oi,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+
+ try {
+ aggregator = (AggregationDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ init(aggregator.getParameters(), aggregator.getGenericUDAFName(), aggregator.getMode(),
+ aggregator.getDistinct(), oi);
+ }
+
+ /**
+ * constructor of aggregation function factory
+ *
+ * @param inputs
+ * @param name
+ * @param udafMode
+ * @param distinct
+ * @param oi
+ */
+ private void init(List<ExprNodeDesc> inputs, String name, GenericUDAFEvaluator.Mode udafMode, boolean distinct,
+ Schema oi) {
+ parametersOrigin = inputs;
+ genericUDAFName = name;
+ mode = udafMode;
+ this.distinct = distinct;
+ rowSchema = oi;
+
+ for (ExprNodeDesc input : inputs) {
+ TypeInfo type = input.getTypeInfo();
+ if (type instanceof StructTypeInfo) {
+ types.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ types.add(type);
+
+ String s = Utilities.serializeExpression(input);
+ parametersSerialization.add(s);
+ }
+ }
+
+ @Override
+ public synchronized ICopySerializableAggregateFunction createAggregateFunction() throws AlgebricksException {
+ if (parametersOrigin == null) {
+ Configuration config = new Configuration();
+ config.setClassLoader(this.getClass().getClassLoader());
+ /**
+ * in case of class.forname(...) call in hive code
+ */
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+ parametersOrigin = new ArrayList<ExprNodeDesc>();
+ for (String serialization : parametersSerialization) {
+ parametersOrigin.add(Utilities.deserializeExpression(serialization, config));
+ }
+ }
+
+ /**
+ * exprs
+ */
+ if (parameterExprs == null)
+ parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
+
+ /**
+ * evaluators
+ */
+ if (evaluators == null)
+ evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
+
+ /**
+ * cached parameter objects
+ */
+ if (cachedParameters == null)
+ cachedParameters = new HashMap<Long, Object[]>();
+
+ /**
+ * cached row object: one per thread
+ */
+ if (cachedRowObjects == null)
+ cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ if (serDe == null)
+ serDe = new HashMap<Long, SerDe>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsComplete == null)
+ udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
+
+ /**
+ * UDAF functions
+ */
+ if (udafsPartial == null)
+ udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
+
+ if (parameterInspectors == null)
+ parameterInspectors = new ObjectInspector[parametersOrigin.size()];
+
+ if (rowInspector == null)
+ rowInspector = rowSchema.toObjectInspector();
+
+ // get current thread id
+ long threadId = Thread.currentThread().getId();
+
+ /**
+ * expressions, expressions are thread local
+ */
+ List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
+ if (parameters == null) {
+ parameters = new ArrayList<ExprNodeDesc>();
+ for (ExprNodeDesc parameter : parametersOrigin)
+ parameters.add(parameter.clone());
+ parameterExprs.put(threadId, parameters);
+ }
+
+ /**
+ * cached parameter objects
+ */
+ Object[] cachedParas = cachedParameters.get(threadId);
+ if (cachedParas == null) {
+ cachedParas = new Object[parameters.size()];
+ cachedParameters.put(threadId, cachedParas);
+ }
+
+ /**
+ * cached row object: one per thread
+ */
+ LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects.get(threadId);
+ if (cachedRowObject == null) {
+ cachedRowObject = LazyFactory.createLazyObject(rowInspector);
+ cachedRowObjects.put(threadId, cachedRowObject);
+ }
+
+ /**
+ * we only use lazy serde to do serialization
+ */
+ SerDe lazySer = serDe.get(threadId);
+ if (lazySer == null) {
+ lazySer = new LazySerDe();
+ serDe.put(threadId, lazySer);
+ }
+
+ /**
+ * evaluators
+ */
+ ExprNodeEvaluator[] evals = evaluators.get(threadId);
+ if (evals == null) {
+ evals = new ExprNodeEvaluator[parameters.size()];
+ evaluators.put(threadId, evals);
+ }
+
+ GenericUDAFEvaluator udafPartial;
+ GenericUDAFEvaluator udafComplete;
+
+ // initialize object inspectors
+ try {
+ /**
+ * evaluators, udf, object inpsectors are shared in one thread
+ */
+ for (int i = 0; i < evals.length; i++) {
+ if (evals[i] == null) {
+ evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
+ if (parameterInspectors[i] == null) {
+ parameterInspectors[i] = evals[i].initialize(rowInspector);
+ } else {
+ evals[i].initialize(rowInspector);
+ }
+ }
+ }
+
+ udafComplete = udafsComplete.get(threadId);
+ if (udafComplete == null) {
+ try {
+ udafComplete = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafsComplete.put(threadId, udafComplete);
+ udafComplete.init(mode, parameterInspectors);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspector == null)
+ outputInspector = udafComplete.init(mode, parameterInspectors);
+
+ // initial partial gby udaf
+ GenericUDAFEvaluator.Mode partialMode;
+ // adjust mode for external groupby
+ if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
+ else if (mode == GenericUDAFEvaluator.Mode.FINAL)
+ partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else
+ partialMode = mode;
+ udafPartial = udafsPartial.get(threadId);
+ if (udafPartial == null) {
+ try {
+ udafPartial = FunctionRegistry.getGenericUDAFEvaluator(genericUDAFName, types, distinct, false);
+ } catch (HiveException e) {
+ throw new AlgebricksException(e);
+ }
+ udafPartial.init(partialMode, parameterInspectors);
+ udafsPartial.put(threadId, udafPartial);
+ }
+
+ // multiple stage group by, determined by the mode parameter
+ if (outputInspectorPartial == null)
+ outputInspectorPartial = udafPartial.init(partialMode, parameterInspectors);
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e);
+ }
+
+ return new AggregatuibFunctionSerializableEvaluator(parameters, types, genericUDAFName, mode, distinct,
+ rowInspector, evals, parameterInspectors, cachedParas, lazySer, cachedRowObject, udafPartial,
+ udafComplete, outputInspector, outputInspectorPartial);
+ }
+
+ public String toString() {
+ return "aggregation function expression evaluator factory: " + this.genericUDAFName;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..6f51bfe
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.ColumnExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ColumnExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeColumnDesc expr;
+
+ private Schema inputSchema;
+
+ public ColumnExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (ExprNodeColumnDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new ColumnExpressionEvaluator(expr, inputSchema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "column expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..4ecdb70
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.ConstantExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ConstantExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeConstantDesc expr;
+
+ private Schema schema;
+
+ public ConstantExpressionEvaluatorFactory(ILogicalExpression expression, Schema inputSchema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeConstantDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ schema = inputSchema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new ConstantExpressionEvaluator(expr, schema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "constant expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..ef0c104
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.FieldExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class FieldExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeFieldDesc expr;
+
+ private Schema inputSchema;
+
+ public FieldExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (ExprNodeFieldDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new FieldExpressionEvaluator(expr, inputSchema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "field access expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
new file mode 100644
index 0000000..c3b4b17
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
@@ -0,0 +1,167 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression.FunctionKind;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.AggregateFunctionFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.ScalarEvaluatorFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.UnnestingFunctionFactoryAdapter;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory;
+
+public class HiveExpressionRuntimeProvider implements IExpressionRuntimeProvider {
+
+ public static final IExpressionRuntimeProvider INSTANCE = new HiveExpressionRuntimeProvider();
+
+ @Override
+ public IAggregateEvaluatorFactory createAggregateFunctionFactory(AggregateFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new AggregateFunctionFactoryAdapter(new AggregationFunctionFactory(expr, schema, env));
+ }
+
+ @Override
+ public ICopySerializableAggregateFunctionFactory createSerializableAggregateFunctionFactory(
+ AggregateFunctionCallExpression expr, IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas,
+ JobGenContext context) throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new AggregationFunctionSerializableFactory(expr, schema, env);
+ }
+
+ @Override
+ public IRunningAggregateEvaluatorFactory createRunningAggregateFunctionFactory(StatefulFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public IUnnestingEvaluatorFactory createUnnestingFunctionFactory(UnnestingFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new UnnestingFunctionFactoryAdapter(new UnnestingFunctionFactory(expr, schema, env));
+ }
+
+ public IScalarEvaluatorFactory createEvaluatorFactory(ILogicalExpression expr, IVariableTypeEnvironment env,
+ IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
+ switch (expr.getExpressionTag()) {
+ case VARIABLE: {
+ VariableReferenceExpression v = (VariableReferenceExpression) expr;
+ return new ScalarEvaluatorFactoryAdapter(createVariableEvaluatorFactory(v, env, inputSchemas, context));
+ }
+ case CONSTANT: {
+ ConstantExpression c = (ConstantExpression) expr;
+ return new ScalarEvaluatorFactoryAdapter(createConstantEvaluatorFactory(c, env, inputSchemas, context));
+ }
+ case FUNCTION_CALL: {
+ AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr;
+ FunctionIdentifier fid = fun.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ return new ScalarEvaluatorFactoryAdapter(createFieldExpressionEvaluatorFactory(fun, env,
+ inputSchemas, context));
+ }
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ return new ScalarEvaluatorFactoryAdapter(createNullExpressionEvaluatorFactory(fun, env,
+ inputSchemas, context));
+ }
+
+ if (fun.getKind() == FunctionKind.SCALAR) {
+ ScalarFunctionCallExpression scalar = (ScalarFunctionCallExpression) fun;
+ return new ScalarEvaluatorFactoryAdapter(createScalarFunctionEvaluatorFactory(scalar, env,
+ inputSchemas, context));
+ } else {
+ throw new AlgebricksException("Cannot create evaluator for function " + fun + " of kind "
+ + fun.getKind());
+ }
+ }
+ default: {
+ throw new IllegalStateException();
+ }
+ }
+ }
+
+ private ICopyEvaluatorFactory createVariableEvaluatorFactory(VariableReferenceExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new ColumnExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createScalarFunctionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ List<String> names = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (IOperatorSchema inputSchema : inputSchemas) {
+ Schema schema = this.getSchema(inputSchema, env);
+ names.addAll(schema.getNames());
+ types.addAll(schema.getTypes());
+ }
+ Schema inputSchema = new Schema(names, types);
+ return new ScalarFunctionExpressionEvaluatorFactory(expr, inputSchema, env);
+ }
+
+ private ICopyEvaluatorFactory createFieldExpressionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new FieldExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createNullExpressionEvaluatorFactory(AbstractFunctionCallExpression expr,
+ IVariableTypeEnvironment env, IOperatorSchema[] inputSchemas, JobGenContext context)
+ throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new NullExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private ICopyEvaluatorFactory createConstantEvaluatorFactory(ConstantExpression expr, IVariableTypeEnvironment env,
+ IOperatorSchema[] inputSchemas, JobGenContext context) throws AlgebricksException {
+ Schema schema = this.getSchema(inputSchemas[0], env);
+ return new ConstantExpressionEvaluatorFactory(expr, schema, env);
+ }
+
+ private Schema getSchema(IOperatorSchema inputSchema, IVariableTypeEnvironment env) throws AlgebricksException {
+ List<String> names = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ Iterator<LogicalVariable> variables = inputSchema.iterator();
+ while (variables.hasNext()) {
+ LogicalVariable var = variables.next();
+ names.add(var.toString());
+ types.add((TypeInfo) env.getVarType(var));
+ }
+
+ Schema schema = new Schema(names, types);
+ return schema;
+ }
+
+}
\ No newline at end of file
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..f37b825
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.NullExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class NullExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private ExprNodeNullDesc expr;
+
+ private Schema schema;
+
+ public NullExpressionEvaluatorFactory(ILogicalExpression expression, Schema intputSchema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeNullDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ schema = intputSchema;
+ }
+
+ public ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ return new NullExpressionEvaluator(expr, schema.toObjectInspector(), output);
+ }
+
+ public String toString() {
+ return "null expression evaluator factory: " + expr.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
new file mode 100644
index 0000000..cbac10a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
@@ -0,0 +1,69 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.FunctionExpressionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class ScalarFunctionExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private transient ExprNodeGenericFuncDesc expr;
+
+ private String exprSerialization;
+
+ private Schema inputSchema;
+
+ private transient Configuration config;
+
+ public ScalarFunctionExpressionEvaluatorFactory(ILogicalExpression expression, Schema schema,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ try {
+ expr = (ExprNodeGenericFuncDesc) ExpressionTranslator.getHiveExpression(expression, env);
+
+ exprSerialization = Utilities.serializeExpression(expr);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ public synchronized ICopyEvaluator createEvaluator(IDataOutputProvider output) throws AlgebricksException {
+ if (expr == null) {
+ configClassLoader();
+ expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(exprSerialization, config);
+ }
+
+ ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr.clone();
+ return new FunctionExpressionEvaluator(funcDesc, inputSchema.toObjectInspector(), output);
+ }
+
+ private void configClassLoader() {
+ config = new Configuration();
+ ClassLoader loader = this.getClass().getClassLoader();
+ config.setClassLoader(loader);
+ Thread.currentThread().setContextClassLoader(loader);
+ }
+
+ public String toString() {
+ if (expr == null) {
+ configClassLoader();
+ expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(exprSerialization, new Configuration());
+ }
+
+ return "function expression evaluator factory: " + expr.getExprString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
new file mode 100644
index 0000000..3b22513
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.evaluator;
+
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionTranslator;
+import edu.uci.ics.hivesterix.runtime.evaluator.UDTFFunctionEvaluator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
+import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunctionFactory;
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+
+public class UnnestingFunctionFactory implements ICopyUnnestingFunctionFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private UDTFDesc expr;
+
+ private Schema inputSchema;
+
+ private int[] columns;
+
+ public UnnestingFunctionFactory(ILogicalExpression expression, Schema schema, IVariableTypeEnvironment env)
+ throws AlgebricksException {
+ try {
+ expr = (UDTFDesc) ExpressionTranslator.getHiveExpression(expression, env);
+ } catch (Exception e) {
+ throw new AlgebricksException(e.getMessage());
+ }
+ inputSchema = schema;
+ }
+
+ @Override
+ public ICopyUnnestingFunction createUnnestingFunction(IDataOutputProvider provider) throws AlgebricksException {
+ return new UDTFFunctionEvaluator(expr, inputSchema, columns, provider.getDataOutput());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..b636009
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveDoubleBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveDoubleBinaryHashFunctionFactory INSTANCE = new HiveDoubleBinaryHashFunctionFactory();
+
+ private HiveDoubleBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+ // TODO Auto-generated method stub
+ return new IBinaryHashFunction() {
+ private Double value;
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ value = Double.longBitsToDouble(LazyUtils.byteArrayToLong(bytes, offset));
+ return value.hashCode();
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..90e6ce4
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveIntegerBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveIntegerBinaryHashFunctionFactory();
+
+ private HiveIntegerBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+ private VInt value = new VInt();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, value);
+ if (value.length != length)
+ throw new IllegalArgumentException("length mismatch in int hash function actual: " + length
+ + " expected " + value.length);
+ return value.value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..1b61f67
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
@@ -0,0 +1,30 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveLongBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveLongBinaryHashFunctionFactory();
+
+ private HiveLongBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+ private VLong value = new VLong();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVLong(bytes, offset, value);
+ return (int) value.value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..f2b7b44
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
@@ -0,0 +1,31 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveRawBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static IBinaryHashFunctionFactory INSTANCE = new HiveRawBinaryHashFunctionFactory();
+
+ private HiveRawBinaryHashFunctionFactory() {
+
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+
+ return new IBinaryHashFunction() {
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ int value = 1;
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
+ value = value * 31 + (int) bytes[i];
+ return value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
new file mode 100644
index 0000000..a9cf6fd
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveStingBinaryHashFunctionFactory implements IBinaryHashFunctionFactory {
+ private static final long serialVersionUID = 1L;
+
+ public static HiveStingBinaryHashFunctionFactory INSTANCE = new HiveStingBinaryHashFunctionFactory();
+
+ private HiveStingBinaryHashFunctionFactory() {
+ }
+
+ @Override
+ public IBinaryHashFunction createBinaryHashFunction() {
+ // TODO Auto-generated method stub
+ return new IBinaryHashFunction() {
+ private VInt len = new VInt();
+
+ @Override
+ public int hash(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, len);
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ return hashBytes(bytes, offset + len.length, length - len.length);
+ }
+
+ public int hashBytes(byte[] bytes, int offset, int length) {
+ int value = 1;
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
+ value = value * 31 + (int) bytes[i];
+ return value;
+ }
+ };
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
similarity index 100%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..6ac012f
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveDoubleAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int header = LazyUtils.byteArrayToInt(bytes, start);
+ long unsignedValue = (long) header;
+ return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..3044109
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveDoubleDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+ private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveDoubleAscNormalizedKeyComputerFactory();
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory.createNormalizedKeyComputer();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int nk = nmkComputer.normalize(bytes, start, length);
+ return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
+ }
+
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..a1d4d48
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveIntegerAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt vint = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, vint);
+ if (vint.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vint.length + " expected " + length);
+ long unsignedValue = (long) vint.value;
+ return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..b8a30a8
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,29 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveIntegerDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt vint = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, vint);
+ if (vint.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vint.length + " expected " + length);
+ long unsignedValue = (long) vint.value;
+ return (int) ((long) 0xffffffff - unsignedValue);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..a893d19
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,63 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveLongAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private static final int POSTIVE_LONG_MASK = (3 << 30);
+ private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
+ private static final int NEGATIVE_LONG_MASK = (0 << 30);
+ private VLong vlong = new VLong();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVLong(bytes, start, vlong);
+ if (vlong.length != length)
+ throw new IllegalArgumentException("length mismatch in int comparator function actual: "
+ + vlong.length + " expected " + length);
+ long value = (long) vlong.value;
+ int highValue = (int) (value >> 32);
+ if (highValue > 0) {
+ /**
+ * larger than Integer.MAX
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= POSTIVE_LONG_MASK;
+ return highNmk;
+ } else if (highValue == 0) {
+ /**
+ * smaller than Integer.MAX but >=0
+ */
+ int lowNmk = (int) value;
+ lowNmk >>= 2;
+ lowNmk |= NON_NEGATIVE_INT_MASK;
+ return lowNmk;
+ } else {
+ /**
+ * less than 0; TODO: have not optimized for that
+ */
+ int highNmk = getKey(highValue);
+ highNmk >>= 2;
+ highNmk |= NEGATIVE_LONG_MASK;
+ return highNmk;
+ }
+ }
+
+ private int getKey(int value) {
+ long unsignedFirstValue = (long) value;
+ int nmk = (int) ((unsignedFirstValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+ return nmk;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..cc5661b
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveLongDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+ private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveIntegerAscNormalizedKeyComputerFactory();
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory.createNormalizedKeyComputer();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ int nk = nmkComputer.normalize(bytes, start, length);
+ return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
+ }
+
+ };
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..d0429d6
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
@@ -0,0 +1,40 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class HiveStringAscNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+
+ return new INormalizedKeyComputer() {
+ private VInt len = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, len);
+
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ int nk = 0;
+ int offset = start + len.length;
+ for (int i = 0; i < 2; ++i) {
+ nk <<= 16;
+ if (i < len.value) {
+ char character = UTF8StringPointable.charAt(bytes, offset);
+ nk += ((int) character) & 0xffff;
+ offset += UTF8StringPointable.charSize(bytes, offset);
+ }
+ }
+ return nk;
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..15b2d27
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hivesterix.runtime.factory.normalize;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+
+public class HiveStringDescNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public INormalizedKeyComputer createNormalizedKeyComputer() {
+ return new INormalizedKeyComputer() {
+ private VInt len = new VInt();
+
+ @Override
+ public int normalize(byte[] bytes, int start, int length) {
+ LazyUtils.readVInt(bytes, start, len);
+ if (len.value + len.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected "
+ + (len.value + len.length) + " but get " + length);
+ int nk = 0;
+ int offset = start + len.length;
+ for (int i = 0; i < 2; ++i) {
+ nk <<= 16;
+ if (i < len.value) {
+ nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
+ offset += UTF8StringPointable.charSize(bytes, offset);
+ }
+ }
+ return (int) ((long) 0xffffffff - (long) nk);
+ }
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
new file mode 100644
index 0000000..590bd61
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
@@ -0,0 +1,28 @@
+package edu.uci.ics.hivesterix.runtime.factory.nullwriter;
+
+import java.io.DataOutput;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class HiveNullWriterFactory implements INullWriterFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ public static HiveNullWriterFactory INSTANCE = new HiveNullWriterFactory();
+
+ @Override
+ public INullWriter createNullWriter() {
+ return new HiveNullWriter();
+ }
+}
+
+class HiveNullWriter implements INullWriter {
+
+ @Override
+ public void writeNull(DataOutput out) throws HyracksDataException {
+ // do nothing
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
new file mode 100644
index 0000000..677e20e
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
@@ -0,0 +1,19 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
+
+public class HiveBinaryBooleanInspector implements IBinaryBooleanInspector {
+
+ HiveBinaryBooleanInspector() {
+ }
+
+ @Override
+ public boolean getBooleanValue(byte[] bytes, int offset, int length) {
+ if (length == 0)
+ return false;
+ if (length != 1)
+ throw new IllegalStateException("boolean field error: with length " + length);
+ return bytes[0] == 1;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
new file mode 100644
index 0000000..22a6065
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
@@ -0,0 +1,20 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+
+public class HiveBinaryBooleanInspectorFactory implements IBinaryBooleanInspectorFactory {
+ private static final long serialVersionUID = 1L;
+ public static HiveBinaryBooleanInspectorFactory INSTANCE = new HiveBinaryBooleanInspectorFactory();
+
+ private HiveBinaryBooleanInspectorFactory() {
+
+ }
+
+ @Override
+ public IBinaryBooleanInspector createBinaryBooleanInspector(IHyracksTaskContext arg0) {
+ return new HiveBinaryBooleanInspector();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
new file mode 100644
index 0000000..555afee
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
@@ -0,0 +1,22 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
+
+public class HiveBinaryIntegerInspector implements IBinaryIntegerInspector {
+ private VInt value = new VInt();
+
+ HiveBinaryIntegerInspector() {
+ }
+
+ @Override
+ public int getIntegerValue(byte[] bytes, int offset, int length) {
+ LazyUtils.readVInt(bytes, offset, value);
+ if (value.length != length)
+ throw new IllegalArgumentException("length mismatch in int hash function actual: " + length + " expected "
+ + value.length);
+ return value.value;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
new file mode 100644
index 0000000..bb93a60
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
@@ -0,0 +1,20 @@
+package edu.uci.ics.hivesterix.runtime.inspector;
+
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+
+public class HiveBinaryIntegerInspectorFactory implements IBinaryIntegerInspectorFactory {
+ private static final long serialVersionUID = 1L;
+ public static HiveBinaryIntegerInspectorFactory INSTANCE = new HiveBinaryIntegerInspectorFactory();
+
+ private HiveBinaryIntegerInspectorFactory() {
+
+ }
+
+ @Override
+ public IBinaryIntegerInspector createBinaryIntegerInspector(IHyracksTaskContext arg0) {
+ return new HiveBinaryIntegerInspector();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
new file mode 100644
index 0000000..cfceb26
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
@@ -0,0 +1,68 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+
+public class HiveConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
+ public enum Policy {
+ PIPELINING,
+ SEND_SIDE_MAT_PIPELINING,
+ SEND_SIDE_MAT_BLOCKING,
+ SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING;
+ };
+
+ private static final long serialVersionUID = 1L;
+
+ private final IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+ private final IConnectorPolicy sendSideMatPipeliningPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private final IConnectorPolicy sendSideMatBlockingPolicy = new SendSideMaterializedBlockingConnectorPolicy();
+ private final IConnectorPolicy sendSideMatReceiveSideMatBlockingPolicy = new SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy();
+ private final Policy policy;
+
+ public HiveConnectorPolicyAssignmentPolicy(Policy policy) {
+ this.policy = policy;
+ }
+
+ @Override
+ public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
+ int[] fanouts) {
+ if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
+ // avoid deadlocks
+ switch (policy) {
+ case PIPELINING:
+ case SEND_SIDE_MAT_PIPELINING:
+ return sendSideMatPipeliningPolicy;
+ case SEND_SIDE_MAT_BLOCKING:
+ return sendSideMatBlockingPolicy;
+ case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
+ return sendSideMatReceiveSideMatBlockingPolicy;
+ default:
+ return sendSideMatPipeliningPolicy;
+ }
+ } else if (c instanceof MToNPartitioningConnectorDescriptor) {
+ // support different repartitioning policies
+ switch (policy) {
+ case PIPELINING:
+ return pipeliningPolicy;
+ case SEND_SIDE_MAT_PIPELINING:
+ return sendSideMatPipeliningPolicy;
+ case SEND_SIDE_MAT_BLOCKING:
+ return sendSideMatBlockingPolicy;
+ case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
+ return sendSideMatReceiveSideMatBlockingPolicy;
+ default:
+ return pipeliningPolicy;
+ }
+ } else {
+ // pipelining for other connectors
+ return pipeliningPolicy;
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
new file mode 100644
index 0000000..ccc2e6c
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
@@ -0,0 +1,32 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningProperty;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
+
+public class HiveDataSink implements IDataSink {
+
+ private Object[] schema;
+
+ private Object fsOperator;
+
+ public HiveDataSink(Object sink, Object[] sourceSchema) {
+ schema = sourceSchema;
+ fsOperator = sink;
+ }
+
+ @Override
+ public Object getId() {
+ return fsOperator;
+ }
+
+ @Override
+ public Object[] getSchemaTypes() {
+ return schema;
+ }
+
+ public IPartitioningProperty getPartitioningProperty() {
+ return new RandomPartitioningProperty(new HiveDomain());
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
new file mode 100644
index 0000000..67b743b
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
@@ -0,0 +1,47 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
+
+public class HiveDataSource<P> implements IDataSource<P> {
+
+ private P source;
+
+ private Object[] schema;
+
+ public HiveDataSource(P dataSource, Object[] sourceSchema) {
+ source = dataSource;
+ schema = sourceSchema;
+ }
+
+ @Override
+ public P getId() {
+ return source;
+ }
+
+ @Override
+ public Object[] getSchemaTypes() {
+ return schema;
+ }
+
+ @Override
+ public void computeFDs(List<LogicalVariable> scanVariables, List<FunctionalDependency> fdList) {
+ }
+
+ @Override
+ public IDataSourcePropertiesProvider getPropertiesProvider() {
+ return new HiveDataSourcePartitioningProvider();
+ }
+
+ @Override
+ public String toString() {
+ PartitionDesc desc = (PartitionDesc) source;
+ return desc.getTableName();
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
similarity index 60%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
index 08dd684..bb9c4ce 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSourcePartitioningProvider.java
@@ -11,16 +11,13 @@
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
import edu.uci.ics.hyracks.algebricks.core.algebra.properties.StructuralPropertiesVector;
-public class HiveDataSourcePartitioningProvider implements
- IDataSourcePropertiesProvider {
+public class HiveDataSourcePartitioningProvider implements IDataSourcePropertiesProvider {
- @Override
- public IPhysicalPropertiesVector computePropertiesVector(
- List<LogicalVariable> scanVariables) {
- IPartitioningProperty property = new RandomPartitioningProperty(
- new HiveDomain());
- IPhysicalPropertiesVector vector = new StructuralPropertiesVector(
- property, new LinkedList<ILocalStructuralProperty>());
- return vector;
- }
+ @Override
+ public IPhysicalPropertiesVector computePropertiesVector(List<LogicalVariable> scanVariables) {
+ IPartitioningProperty property = new RandomPartitioningProperty(new HiveDomain());
+ IPhysicalPropertiesVector vector = new StructuralPropertiesVector(property,
+ new LinkedList<ILocalStructuralProperty>());
+ return vector;
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
new file mode 100644
index 0000000..8b1d3b5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
@@ -0,0 +1,17 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.INodeDomain;
+
+public class HiveDomain implements INodeDomain {
+
+ @Override
+ public boolean sameAs(INodeDomain domain) {
+ return true;
+ }
+
+ @Override
+ public Integer cardinality() {
+ return 0;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
new file mode 100644
index 0000000..13526d8
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
@@ -0,0 +1,130 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveScanRuntimeGenerator;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveWriteRuntimeGenerator;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+
+@SuppressWarnings("rawtypes")
+public class HiveMetaDataProvider<S, T> implements IMetadataProvider<S, T> {
+
+ private Operator fileSink;
+ private Schema outputSchema;
+ private HashMap<S, IDataSource<S>> dataSourceMap;
+
+ public HiveMetaDataProvider(Operator fsOp, Schema oi, HashMap<S, IDataSource<S>> map) {
+ fileSink = fsOp;
+ outputSchema = oi;
+ dataSourceMap = map;
+ }
+
+ @Override
+ public IDataSourceIndex<T, S> findDataSourceIndex(T indexId, S dataSourceId) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public IDataSource<S> findDataSource(S id) throws AlgebricksException {
+ return dataSourceMap.get(id);
+ }
+
+ @Override
+ public boolean scannerOperatorIsLeaf(IDataSource<S> dataSource) {
+ return true;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(IDataSource<S> dataSource,
+ List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables, boolean projectPushed,
+ IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv, JobGenContext context, JobSpecification jobSpec)
+ throws AlgebricksException {
+
+ S desc = dataSource.getId();
+ HiveScanRuntimeGenerator generator = new HiveScanRuntimeGenerator((PartitionDesc) desc);
+ return generator.getRuntimeOperatorAndConstraint(dataSource, scanVariables, projectVariables, projectPushed,
+ context, jobSpec);
+ }
+
+ @Override
+ public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(IDataSink sink,
+ int[] printColumns, IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) {
+
+ HiveWriteRuntimeGenerator generator = new HiveWriteRuntimeGenerator((FileSinkOperator) fileSink, outputSchema);
+ return generator.getWriterRuntime(inputDesc);
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, RecordDescriptor arg4,
+ JobGenContext arg5, JobSpecification arg6) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, RecordDescriptor arg4,
+ JobGenContext arg5, JobSpecification arg6) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(IDataSource<S> arg0,
+ IOperatorSchema arg1, List<LogicalVariable> arg2, LogicalVariable arg3, JobGenContext arg4,
+ JobSpecification arg5) throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public IFunctionInfo lookupFunction(FunctionIdentifier arg0) {
+ return new HiveFunctionInfo(arg0, null);
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
+ IDataSourceIndex<T, S> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
+ IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ @Override
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
+ IDataSourceIndex<T, S> dataSource, IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
+ IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
+ ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
+ throws AlgebricksException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
new file mode 100644
index 0000000..cdb0e95
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
@@ -0,0 +1,84 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+
+public class HiveOperatorSchema implements IOperatorSchema {
+
+ private final Map<LogicalVariable, Integer> varMap;
+
+ private final List<LogicalVariable> varList;
+
+ public HiveOperatorSchema() {
+ varMap = new HashMap<LogicalVariable, Integer>();
+ varList = new ArrayList<LogicalVariable>();
+ }
+
+ @Override
+ public void addAllVariables(IOperatorSchema source) {
+ for (LogicalVariable v : source) {
+ varMap.put(v, varList.size());
+ varList.add(v);
+ }
+ }
+
+ @Override
+ public void addAllNewVariables(IOperatorSchema source) {
+ for (LogicalVariable v : source) {
+ if (varMap.get(v) == null) {
+ varMap.put(v, varList.size());
+ varList.add(v);
+ }
+ }
+ }
+
+ @Override
+ public int addVariable(LogicalVariable var) {
+ int idx = varList.size();
+ varMap.put(var, idx);
+ varList.add(var);
+ return idx;
+ }
+
+ @Override
+ public void clear() {
+ varMap.clear();
+ varList.clear();
+ }
+
+ @Override
+ public int findVariable(LogicalVariable var) {
+ Integer i = varMap.get(var);
+ if (i == null) {
+ return -1;
+ }
+ return i;
+ }
+
+ @Override
+ public int getSize() {
+ return varList.size();
+ }
+
+ @Override
+ public LogicalVariable getVariable(int index) {
+ return varList.get(index);
+ }
+
+ @Override
+ public Iterator<LogicalVariable> iterator() {
+ return varList.iterator();
+ }
+
+ @Override
+ public String toString() {
+ return varMap.toString();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
new file mode 100644
index 0000000..0d2c78a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
@@ -0,0 +1,115 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveKeyValueParserFactory;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
+import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
+import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+import edu.uci.ics.hyracks.hdfs.dataflow.HDFSReadOperatorDescriptor;
+import edu.uci.ics.hyracks.hdfs.scheduler.Scheduler;
+
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class HiveScanRuntimeGenerator {
+
+ private PartitionDesc fileDesc;
+
+ private transient Path filePath;
+
+ private String filePathName;
+
+ private Properties properties;
+
+ public HiveScanRuntimeGenerator(PartitionDesc path) {
+ fileDesc = path;
+ properties = fileDesc.getProperties();
+
+ String inputPath = (String) properties.getProperty("location");
+
+ if (inputPath.startsWith("file:")) {
+ // Windows
+ String[] strs = inputPath.split(":");
+ filePathName = strs[strs.length - 1];
+ } else {
+ // Linux
+ filePathName = inputPath;
+ }
+
+ filePath = new Path(filePathName);
+ }
+
+ public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRuntimeOperatorAndConstraint(
+ IDataSource dataSource, List<LogicalVariable> scanVariables, List<LogicalVariable> projectVariables,
+ boolean projectPushed, JobGenContext context, JobSpecification jobSpec) throws AlgebricksException {
+ try {
+ // get the correct delimiter from Hive metastore or other data
+ // structures
+ IOperatorSchema propagatedSchema = new HiveOperatorSchema();
+
+ List<LogicalVariable> outputVariables = projectPushed ? projectVariables : scanVariables;
+ for (LogicalVariable var : outputVariables)
+ propagatedSchema.addVariable(var);
+
+ int[] outputColumnsOffset = new int[scanVariables.size()];
+ int i = 0;
+ for (LogicalVariable var : scanVariables)
+ if (outputVariables.contains(var)) {
+ int offset = outputVariables.indexOf(var);
+ outputColumnsOffset[i++] = offset;
+ } else
+ outputColumnsOffset[i++] = -1;
+
+ Object[] schemaTypes = dataSource.getSchemaTypes();
+ // get record descriptor
+ RecordDescriptor recDescriptor = mkRecordDescriptor(propagatedSchema, schemaTypes, context);
+
+ // setup the run time operator and constraints
+ JobConf conf = ConfUtil.getJobConf(fileDesc.getInputFileFormatClass(), filePath);
+ String[] locConstraints = ConfUtil.getNCs();
+ Map<String, NodeControllerInfo> ncNameToNcInfos = ConfUtil.getNodeControllerInfo();
+ Scheduler scheduler = new Scheduler(ncNameToNcInfos);
+ InputSplit[] splits = conf.getInputFormat().getSplits(conf, locConstraints.length);
+ String[] schedule = scheduler.getLocationConstraints(splits);
+ IOperatorDescriptor scanner = new HDFSReadOperatorDescriptor(jobSpec, recDescriptor, conf, splits,
+ schedule, new HiveKeyValueParserFactory(fileDesc, conf, outputColumnsOffset));
+
+ return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(scanner,
+ new AlgebricksAbsolutePartitionConstraint(locConstraints));
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+ private static RecordDescriptor mkRecordDescriptor(IOperatorSchema opSchema, Object[] types, JobGenContext context)
+ throws AlgebricksException {
+ ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
+ ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
+ int size = opSchema.getSize();
+ for (int i = 0; i < size; i++) {
+ Object t = types[i];
+ fields[i] = sdp.getSerializerDeserializer(t);
+ i++;
+ }
+ return new RecordDescriptor(fields);
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
new file mode 100644
index 0000000..7a577e8
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
@@ -0,0 +1,37 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.runtime.operator.filewrite.HivePushRuntimeFactory;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+@SuppressWarnings("deprecation")
+public class HiveWriteRuntimeGenerator {
+ private FileSinkOperator fileSink;
+
+ private Schema inputSchema;
+
+ public HiveWriteRuntimeGenerator(FileSinkOperator fsOp, Schema oi) {
+ fileSink = fsOp;
+ inputSchema = oi;
+ }
+
+ /**
+ * get the write runtime
+ *
+ * @param inputDesc
+ * @return
+ */
+ public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriterRuntime(RecordDescriptor inputDesc) {
+ JobConf conf = ConfUtil.getJobConf();
+ IPushRuntimeFactory factory = new HivePushRuntimeFactory(inputDesc, conf, fileSink, inputSchema);
+ Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> pair = new Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint>(
+ factory, null);
+ return pair;
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java
new file mode 100644
index 0000000..927c709
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/Schema.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.jobgen;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+
+public class Schema implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private List<String> fieldNames;
+
+ private List<TypeInfo> fieldTypes;
+
+ public Schema(List<String> fieldNames, List<TypeInfo> fieldTypes) {
+ this.fieldNames = fieldNames;
+ this.fieldTypes = fieldTypes;
+ }
+
+ public ObjectInspector toObjectInspector() {
+ return LazyUtils.getLazyObjectInspector(fieldNames, fieldTypes);
+ }
+
+ public List<String> getNames() {
+ return fieldNames;
+ }
+
+ public List<TypeInfo> getTypes() {
+ return fieldTypes;
+ }
+
+ public Object[] getSchema() {
+ return fieldTypes.toArray();
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
new file mode 100644
index 0000000..472994a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParser.java
@@ -0,0 +1,209 @@
+package edu.uci.ics.hivesterix.runtime.operator.filescan;
+
+import java.io.DataOutput;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import edu.uci.ics.hivesterix.serde.parser.IHiveParser;
+import edu.uci.ics.hivesterix.serde.parser.TextToBinaryTupleParser;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
+import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+
+@SuppressWarnings("deprecation")
+public class HiveKeyValueParser<K, V> implements IKeyValueParser<K, V> {
+ /**
+ * the columns to output: projection is pushed into this scan
+ */
+ private int[] outputColumnsOffset;
+
+ /**
+ * serialization/de-serialization object
+ */
+ private SerDe serDe;
+
+ /**
+ * the input row object inspector
+ */
+ private StructObjectInspector structInspector;
+
+ /**
+ * the hadoop job conf
+ */
+ private JobConf job;
+
+ /**
+ * Hyrax context to control resource allocation
+ */
+ private final IHyracksTaskContext ctx;
+
+ /**
+ * lazy serde: format flow in between operators
+ */
+ private final SerDe outputSerDe;
+
+ /**
+ * the parser from hive data to binary data
+ */
+ private IHiveParser parser;
+
+ /**
+ * the buffer for buffering output data
+ */
+ private ByteBuffer buffer;
+
+ /**
+ * the frame tuple appender
+ */
+ private FrameTupleAppender appender;
+
+ /**
+ * the array tuple builder
+ */
+ private ArrayTupleBuilder tb;
+
+ /**
+ * the field references of all fields
+ */
+ private List<? extends StructField> fieldRefs;
+
+ /**
+ * output fields
+ */
+ private Object[] outputFields;
+
+ /**
+ * output field references
+ */
+ private StructField[] outputFieldRefs;
+
+ public HiveKeyValueParser(String serDeClass, String outputSerDeClass, Properties tbl, JobConf conf,
+ final IHyracksTaskContext ctx, int[] outputColumnsOffset) throws HyracksDataException {
+ try {
+ job = conf;
+ // initialize the input serde
+ serDe = (SerDe) ReflectionUtils.newInstance(Class.forName(serDeClass), job);
+ serDe.initialize(job, tbl);
+ // initialize the output serde
+ outputSerDe = (SerDe) ReflectionUtils.newInstance(Class.forName(outputSerDeClass), job);
+ outputSerDe.initialize(job, tbl);
+ // object inspector of the row
+ structInspector = (StructObjectInspector) serDe.getObjectInspector();
+ // hyracks context
+ this.ctx = ctx;
+ this.outputColumnsOffset = outputColumnsOffset;
+
+ if (structInspector instanceof LazySimpleStructObjectInspector) {
+ LazySimpleStructObjectInspector rowInspector = (LazySimpleStructObjectInspector) structInspector;
+ List<? extends StructField> fieldRefs = rowInspector.getAllStructFieldRefs();
+ boolean lightWeightParsable = true;
+ for (StructField fieldRef : fieldRefs) {
+ Category category = fieldRef.getFieldObjectInspector().getCategory();
+ if (!(category == Category.PRIMITIVE)) {
+ lightWeightParsable = false;
+ break;
+ }
+ }
+ if (lightWeightParsable) {
+ parser = new TextToBinaryTupleParser(this.outputColumnsOffset, structInspector);
+ }
+ }
+
+ fieldRefs = structInspector.getAllStructFieldRefs();
+ int size = 0;
+ for (int i = 0; i < outputColumnsOffset.length; i++) {
+ if (outputColumnsOffset[i] >= 0) {
+ size++;
+ }
+ }
+
+ tb = new ArrayTupleBuilder(size);
+ outputFieldRefs = new StructField[size];
+ outputFields = new Object[size];
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0)
+ outputFieldRefs[outputColumnsOffset[i]] = fieldRefs.get(i);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void open(IFrameWriter writer) throws HyracksDataException {
+ buffer = ctx.allocateFrame();
+ appender = new FrameTupleAppender(ctx.getFrameSize());
+ appender.reset(buffer, true);
+ }
+
+ @Override
+ public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException {
+ try {
+ tb.reset();
+ if (parser != null) {
+ Text text = (Text) value;
+ parser.parse(text.getBytes(), 0, text.getLength(), tb);
+ } else {
+ Object row = serDe.deserialize((Writable) value);
+ /**
+ * write fields to the tuple builder one by one
+ */
+ int i = 0;
+ for (StructField fieldRef : fieldRefs) {
+ if (outputColumnsOffset[i] >= 0)
+ outputFields[outputColumnsOffset[i]] = structInspector.getStructFieldData(row, fieldRef);
+ i++;
+ }
+ i = 0;
+ DataOutput dos = tb.getDataOutput();
+ for (Object field : outputFields) {
+ BytesWritable fieldWritable = (BytesWritable) outputSerDe.serialize(field,
+ outputFieldRefs[i].getFieldObjectInspector());
+ dos.write(fieldWritable.getBytes(), 0, fieldWritable.getSize());
+ tb.addFieldEndOffset();
+ i++;
+ }
+ }
+
+ /**
+ * append the tuple and flush it if necessary.
+ */
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ FrameUtils.flushFrame(buffer, writer);
+ appender.reset(buffer, true);
+ if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
+ throw new IllegalStateException();
+ }
+ }
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close(IFrameWriter writer) throws HyracksDataException {
+ /**
+ * flush the residual tuples
+ */
+ if (appender.getTupleCount() > 0) {
+ FrameUtils.flushFrame(buffer, writer);
+ }
+ System.gc();
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java
new file mode 100644
index 0000000..05903b9
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveKeyValueParserFactory.java
@@ -0,0 +1,39 @@
+package edu.uci.ics.hivesterix.runtime.operator.filescan;
+
+import java.util.Properties;
+
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParser;
+import edu.uci.ics.hyracks.hdfs.api.IKeyValueParserFactory;
+import edu.uci.ics.hyracks.hdfs.dataflow.ConfFactory;
+
+@SuppressWarnings("deprecation")
+public class HiveKeyValueParserFactory<K, V> implements IKeyValueParserFactory<K, V> {
+ private static final long serialVersionUID = 1L;
+ private final String serDeClass;
+ private final String outputSerDeClass = LazySerDe.class.getName();;
+ private final Properties tbl;
+ private final ConfFactory confFactory;
+ private final int[] outputColumnsOffset;
+
+ public HiveKeyValueParserFactory(PartitionDesc desc, JobConf conf, int[] outputColumnsOffset)
+ throws HyracksDataException {
+ this.tbl = desc.getProperties();
+ this.serDeClass = (String) tbl.getProperty("serialization.lib");
+ this.outputColumnsOffset = outputColumnsOffset;
+ this.confFactory = new ConfFactory(conf);
+ }
+
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ @Override
+ public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx) throws HyracksDataException {
+ return new HiveKeyValueParser(serDeClass, outputSerDeClass, tbl, confFactory.getConf(), ctx,
+ outputColumnsOffset);
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
new file mode 100644
index 0000000..3a62b2e
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
@@ -0,0 +1,147 @@
+package edu.uci.ics.hivesterix.runtime.operator.filewrite;
+
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.OperatorFactory;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
+
+@SuppressWarnings("deprecation")
+public class HiveFileWritePushRuntime implements IPushRuntime {
+
+ /**
+ * frame tuple accessor to access byte buffer
+ */
+ private final FrameTupleAccessor accessor;
+
+ /**
+ * input object inspector
+ */
+ private final ObjectInspector inputInspector;
+
+ /**
+ * cachedInput
+ */
+ private final LazyColumnar cachedInput;
+
+ /**
+ * File sink operator of Hive
+ */
+ private final FileSinkDesc fileSink;
+
+ /**
+ * job configuration, which contain name node and other configuration
+ * information
+ */
+ private JobConf conf;
+
+ /**
+ * input object inspector
+ */
+ private final Schema inputSchema;
+
+ /**
+ * a copy of hive schema representation
+ */
+ private RowSchema rowSchema;
+
+ /**
+ * the Hive file sink operator
+ */
+ private FileSinkOperator fsOp;
+
+ /**
+ * cached tuple object reference
+ */
+ private FrameTupleReference tuple = new FrameTupleReference();
+
+ /**
+ * @param spec
+ * @param fsProvider
+ */
+ public HiveFileWritePushRuntime(IHyracksTaskContext context, RecordDescriptor inputRecordDesc, JobConf job,
+ FileSinkDesc fs, RowSchema schema, Schema oi) {
+ fileSink = fs;
+ fileSink.setGatherStats(false);
+
+ rowSchema = schema;
+ conf = job;
+ inputSchema = oi;
+
+ accessor = new FrameTupleAccessor(context.getFrameSize(), inputRecordDesc);
+ inputInspector = inputSchema.toObjectInspector();
+ cachedInput = new LazyColumnar((LazyColumnarObjectInspector) inputInspector);
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
+ fsOp.setChildOperators(null);
+ fsOp.setParentOperators(null);
+ conf.setClassLoader(this.getClass().getClassLoader());
+
+ ObjectInspector[] inspectors = new ObjectInspector[1];
+ inspectors[0] = inputInspector;
+ try {
+ fsOp.initialize(conf, inspectors);
+ fsOp.setExecContext(null);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Override
+ public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
+ accessor.reset(buffer);
+ int n = accessor.getTupleCount();
+ try {
+ for (int i = 0; i < n; ++i) {
+ tuple.reset(accessor, i);
+ cachedInput.init(tuple);
+ fsOp.process(cachedInput, 0);
+ }
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ try {
+ Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+ fsOp.closeOp(false);
+ } catch (HiveException e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ @Override
+ public void setFrameWriter(int index, IFrameWriter writer, RecordDescriptor recordDesc) {
+ throw new IllegalStateException();
+ }
+
+ @Override
+ public void setInputRecordDescriptor(int index, RecordDescriptor recordDescriptor) {
+ }
+
+ @Override
+ public void fail() throws HyracksDataException {
+
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
new file mode 100644
index 0000000..6c18231
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
@@ -0,0 +1,105 @@
+package edu.uci.ics.hivesterix.runtime.operator.filewrite;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.UUID;
+
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.RowSchema;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
+import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+
+@SuppressWarnings("deprecation")
+public class HivePushRuntimeFactory implements IPushRuntimeFactory {
+
+ private static final long serialVersionUID = 1L;
+
+ private final RecordDescriptor inputRecordDesc;
+ private transient JobConf conf;
+ private final FileSinkDesc fileSink;
+ private final RowSchema outSchema;
+ private final Schema schema;
+
+ /**
+ * the content of the configuration
+ */
+ private String confContent;
+
+ public HivePushRuntimeFactory(RecordDescriptor inputRecordDesc, JobConf conf, FileSinkOperator fsp, Schema sch) {
+ this.inputRecordDesc = inputRecordDesc;
+ this.conf = conf;
+ this.fileSink = fsp.getConf();
+ outSchema = fsp.getSchema();
+ this.schema = sch;
+
+ writeConfContent();
+ }
+
+ @Override
+ public String toString() {
+ return "file write";
+ }
+
+ @Override
+ public IPushRuntime createPushRuntime(IHyracksTaskContext context) throws AlgebricksException {
+ if (conf == null)
+ readConfContent();
+
+ return new HiveFileWritePushRuntime(context, inputRecordDesc, conf, fileSink, outSchema, schema);
+ }
+
+ private void readConfContent() {
+ File dir = new File("hadoop-conf-tmp");
+ if (!dir.exists()) {
+ dir.mkdir();
+ }
+
+ String fileName = "hadoop-conf-tmp/" + UUID.randomUUID() + System.currentTimeMillis() + ".xml";
+ try {
+ PrintWriter out = new PrintWriter((new OutputStreamWriter(new FileOutputStream(new File(fileName)))));
+ out.write(confContent);
+ out.close();
+ conf = new JobConf(fileName);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private void writeConfContent() {
+ File dir = new File("hadoop-conf-tmp");
+ if (!dir.exists()) {
+ dir.mkdir();
+ }
+
+ String fileName = "hadoop-conf-tmp/" + UUID.randomUUID() + System.currentTimeMillis() + ".xml";
+ try {
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(new File(fileName)));
+ conf.writeXml(out);
+ out.close();
+
+ DataInputStream in = new DataInputStream(new FileInputStream(fileName));
+ StringBuffer buffer = new StringBuffer();
+ String line;
+ while ((line = in.readLine()) != null) {
+ buffer.append(line + "\n");
+ }
+ in.close();
+ confContent = buffer.toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
new file mode 100644
index 0000000..467ec0a
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
@@ -0,0 +1,75 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryDescComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryAscComparatorFactory;
+import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryDescComparatorFactory;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+
+public class HiveBinaryComparatorFactoryProvider implements IBinaryComparatorFactoryProvider {
+
+ public static final HiveBinaryComparatorFactoryProvider INSTANCE = new HiveBinaryComparatorFactoryProvider();
+
+ private HiveBinaryComparatorFactoryProvider() {
+ }
+
+ @Override
+ public IBinaryComparatorFactory getBinaryComparatorFactory(Object type, boolean ascending)
+ throws AlgebricksException {
+ if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ if (ascending)
+ return HiveIntegerBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveIntegerBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ if (ascending)
+ return HiveLongBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveLongBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.floatTypeInfo)) {
+ if (ascending)
+ return HiveFloatBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveFloatBinaryDescComparatorFactory.INSTANCE;
+
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ if (ascending)
+ return HiveDoubleBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveDoubleBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.shortTypeInfo)) {
+ if (ascending)
+ return HiveShortBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveShortBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ if (ascending)
+ return HiveStringBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveStringBinaryDescComparatorFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.byteTypeInfo) || type.equals(TypeInfoFactory.booleanTypeInfo)) {
+ if (ascending)
+ return HiveByteBinaryAscComparatorFactory.INSTANCE;
+ else
+ return HiveByteBinaryDescComparatorFactory.INSTANCE;
+ } else
+ throw new NotImplementedException();
+ }
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
new file mode 100644
index 0000000..473eee1
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveDoubleBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveIntegerBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveLongBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveRawBinaryHashFunctionFactory;
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveStingBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+
+public class HiveBinaryHashFunctionFactoryProvider implements IBinaryHashFunctionFactoryProvider {
+
+ public static final HiveBinaryHashFunctionFactoryProvider INSTANCE = new HiveBinaryHashFunctionFactoryProvider();
+
+ private HiveBinaryHashFunctionFactoryProvider() {
+ }
+
+ @Override
+ public IBinaryHashFunctionFactory getBinaryHashFunctionFactory(Object type) throws AlgebricksException {
+ if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return HiveIntegerBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return HiveLongBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return HiveStingBinaryHashFunctionFactory.INSTANCE;
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return HiveDoubleBinaryHashFunctionFactory.INSTANCE;
+ } else {
+ return HiveRawBinaryHashFunctionFactory.INSTANCE;
+ }
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
similarity index 100%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
new file mode 100644
index 0000000..91bf3e5
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringAscNormalizedKeyComputerFactory;
+import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringDescNormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+
+public class HiveNormalizedKeyComputerFactoryProvider implements INormalizedKeyComputerFactoryProvider {
+
+ public static final HiveNormalizedKeyComputerFactoryProvider INSTANCE = new HiveNormalizedKeyComputerFactoryProvider();
+
+ private HiveNormalizedKeyComputerFactoryProvider() {
+ }
+
+ @Override
+ public INormalizedKeyComputerFactory getNormalizedKeyComputerFactory(Object type, boolean ascending) {
+ if (ascending) {
+ if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return new HiveStringAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return new HiveIntegerAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return new HiveLongAscNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return new HiveDoubleAscNormalizedKeyComputerFactory();
+ } else {
+ return null;
+ }
+ } else {
+ if (type.equals(TypeInfoFactory.stringTypeInfo)) {
+ return new HiveStringDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
+ return new HiveIntegerDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
+ return new HiveLongDescNormalizedKeyComputerFactory();
+ } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
+ return new HiveDoubleDescNormalizedKeyComputerFactory();
+ } else {
+ return null;
+ }
+ }
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
similarity index 61%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
rename to hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
index bebb457..10c84d2 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HivePrinterFactoryProvider.java
@@ -6,12 +6,11 @@
public class HivePrinterFactoryProvider implements IPrinterFactoryProvider {
- public static IPrinterFactoryProvider INSTANCE = new HivePrinterFactoryProvider();
+ public static IPrinterFactoryProvider INSTANCE = new HivePrinterFactoryProvider();
- @Override
- public IPrinterFactory getPrinterFactory(Object type)
- throws AlgebricksException {
- return null;
- }
+ @Override
+ public IPrinterFactory getPrinterFactory(Object type) throws AlgebricksException {
+ return null;
+ }
}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
new file mode 100644
index 0000000..22f81e0
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
@@ -0,0 +1,21 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+
+public class HiveSerializerDeserializerProvider implements ISerializerDeserializerProvider {
+
+ public static final HiveSerializerDeserializerProvider INSTANCE = new HiveSerializerDeserializerProvider();
+
+ private HiveSerializerDeserializerProvider() {
+ }
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public ISerializerDeserializer getSerializerDeserializer(Object type) throws AlgebricksException {
+ // return ARecordSerializerDeserializer.SCHEMALESS_INSTANCE;
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
new file mode 100644
index 0000000..be4b149
--- /dev/null
+++ b/hivesterix/hivesterix-runtime/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
@@ -0,0 +1,33 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.algebricks.data.ITypeTraitProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+
+public class HiveTypeTraitProvider implements ITypeTraitProvider, Serializable {
+ private static final long serialVersionUID = 1L;
+ public static HiveTypeTraitProvider INSTANCE = new HiveTypeTraitProvider();
+
+ private HiveTypeTraitProvider() {
+
+ }
+
+ @Override
+ public ITypeTraits getTypeTrait(Object arg0) {
+ return new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public int getFixedLength() {
+ return -1;
+ }
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ };
+ }
+}
diff --git a/hivesterix/hivesterix-serde/pom.xml b/hivesterix/hivesterix-serde/pom.xml
new file mode 100644
index 0000000..0ba73bd
--- /dev/null
+++ b/hivesterix/hivesterix-serde/pom.xml
@@ -0,0 +1,67 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>hivesterix-serde</artifactId>
+ <name>hivesterix-serde</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-serde</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-common</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-core</artifactId>
+ <version>0.20.2</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
similarity index 85%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
index 673416d..92415f9 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/ByteArrayRef.java
@@ -19,24 +19,23 @@
/**
* ByteArrayRef stores a reference to a byte array.
- *
* The LazyObject hierarchy uses a reference to a single ByteArrayRef, so that
* it's much faster to switch to the next row and release the reference to the
* old row (so that the system can do garbage collection if needed).
*/
public class ByteArrayRef {
- /**
- * Stores the actual data.
- */
- byte[] data;
+ /**
+ * Stores the actual data.
+ */
+ byte[] data;
- public byte[] getData() {
- return data;
- }
+ public byte[] getData() {
+ return data;
+ }
- public void setData(byte[] data) {
- this.data = data;
- }
+ public void setData(byte[] data) {
+ this.data = data;
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
new file mode 100644
index 0000000..33b20bf
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
@@ -0,0 +1,229 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+
+/**
+ * LazyArray is serialized as follows: start A b b b b b b end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the list has N elements, then there are
+ * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
+ * indicates whether that element is null (0) or not null (1).
+ * After A, all b(s) represent the elements of the list. Each of them is again a
+ * LazyObject.
+ */
+
+public class LazyArray extends LazyNonPrimitive<LazyListObjectInspector> {
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed = false;
+ /**
+ * The length of the array. Only valid when the data is parsed.
+ */
+ int arraySize = 0;
+
+ /**
+ * The start positions and lengths of array elements. Only valid when the
+ * data is parsed.
+ */
+ int[] elementStart;
+ int[] elementLength;
+
+ /**
+ * Whether an element is initialized or not.
+ */
+ boolean[] elementInited;
+
+ /**
+ * Whether an element is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] elementIsNull;
+
+ /**
+ * The elements of the array. Note that we call arrayElements[i].init(bytes,
+ * begin, length) only when that element is accessed.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] arrayElements;
+
+ /**
+ * Construct a LazyArray object with the ObjectInspector.
+ *
+ * @param oi
+ * the oi representing the type of this LazyArray
+ */
+ protected LazyArray(LazyListObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyArray.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Enlarge the size of arrays storing information for the elements inside
+ * the array.
+ */
+ private void adjustArraySize(int newSize) {
+ if (elementStart == null || elementStart.length < newSize) {
+ elementStart = new int[newSize];
+ elementLength = new int[newSize];
+ elementInited = new boolean[newSize];
+ elementIsNull = new boolean[newSize];
+ arrayElements = new LazyObject[newSize];
+ }
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the bytes and fill elementStart, elementLength, elementInited and
+ * elementIsNull.
+ */
+ private void parse() {
+
+ // get the vlong that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ arraySize = vInt.value;
+ if (0 == arraySize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(arraySize);
+ // find out the null-bytes
+ int arryByteStart = start + vInt.length;
+ int nullByteCur = arryByteStart;
+ int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
+ // the begin the real elements
+ int lastElementByteEnd = nullByteEnd;
+ // the list element object inspector
+ ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi).getListElementObjectInspector();
+ // parsing elements one by one
+ for (int i = 0; i < arraySize; i++) {
+ elementIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
+ elementIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(listEleObjectInspector, bytes, lastElementByteEnd, recordInfo);
+ elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ elementLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = elementStart[i] + elementLength[i];
+ }
+ // move onto the next null byte
+ if (7 == (i % 8)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(elementInited, 0, arraySize, false);
+ parsed = true;
+ }
+
+ /**
+ * Returns the actual primitive object at the index position inside the
+ * array represented by this LazyObject.
+ */
+ public Object getListElementObject(int index) {
+ if (!parsed) {
+ parse();
+ }
+ if (index < 0 || index >= arraySize) {
+ return null;
+ }
+ return uncheckedGetElement(index);
+ }
+
+ /**
+ * Get the element without checking out-of-bound index.
+ *
+ * @param index
+ * index to the array element
+ */
+ private Object uncheckedGetElement(int index) {
+
+ if (elementIsNull[index]) {
+ return null;
+ } else {
+ if (!elementInited[index]) {
+ elementInited[index] = true;
+ if (arrayElements[index] == null) {
+ arrayElements[index] = LazyFactory.createLazyObject((oi).getListElementObjectInspector());
+ }
+ arrayElements[index].init(bytes, elementStart[index], elementLength[index]);
+ }
+ }
+ return arrayElements[index].getObject();
+ }
+
+ /**
+ * Returns the array size.
+ */
+ public int getListLength() {
+ if (!parsed) {
+ parse();
+ }
+ return arraySize;
+ }
+
+ /**
+ * cachedList is reused every time getList is called. Different
+ * LazyBianryArray instances cannot share the same cachedList.
+ */
+ ArrayList<Object> cachedList;
+
+ /**
+ * Returns the List of actual primitive objects. Returns null for null
+ * array.
+ */
+ public List<Object> getList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>(arraySize);
+ } else {
+ cachedList.clear();
+ }
+ for (int index = 0; index < arraySize; index++) {
+ cachedList.add(uncheckedGetElement(index));
+ }
+ return cachedList;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
new file mode 100644
index 0000000..5a48525
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.BooleanWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+
+/**
+ * LazyObject for storing a value of boolean.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyBoolean extends LazyPrimitive<LazyBooleanObjectInspector, BooleanWritable> {
+
+ public LazyBoolean(LazyBooleanObjectInspector oi) {
+ super(oi);
+ data = new BooleanWritable();
+ }
+
+ public LazyBoolean(LazyBoolean copy) {
+ super(copy);
+ data = new BooleanWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // a temporal hack
+ assert (1 == length);
+ byte val = bytes[start];
+ if (val == 0) {
+ data.set(false);
+ } else if (val == 1) {
+ data.set(true);
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
new file mode 100644
index 0000000..bf4ff04
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.ByteWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+
+/**
+ * LazyObject for storing a value of Byte.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyByte extends LazyPrimitive<LazyByteObjectInspector, ByteWritable> {
+
+ public LazyByte(LazyByteObjectInspector oi) {
+ super(oi);
+ data = new ByteWritable();
+ }
+
+ public LazyByte(LazyByte copy) {
+ super(copy);
+ data = new ByteWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (1 == length);
+ data.set(bytes[start]);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
new file mode 100644
index 0000000..d73fea7
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
@@ -0,0 +1,207 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject for storing a struct. The field of a struct can be primitive or
+ * non-primitive.
+ * LazyStruct does not deal with the case of a NULL struct. That is handled by
+ * the parent LazyObject.
+ */
+@SuppressWarnings("rawtypes")
+public class LazyColumnar extends LazyNonPrimitive<LazyColumnarObjectInspector> {
+
+ /**
+ * IFrameTupleReference: the backend of the struct
+ */
+ IFrameTupleReference tuple;
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean reset;
+
+ /**
+ * The fields of the struct.
+ */
+ LazyObject[] fields;
+
+ /**
+ * Whether init() has been called on the field or not.
+ */
+ boolean[] fieldVisited;
+
+ /**
+ * whether it is the first time initialization
+ */
+ boolean start = true;
+
+ /**
+ * Construct a LazyStruct object with the ObjectInspector.
+ */
+ public LazyColumnar(LazyColumnarObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyStruct.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ reset = false;
+ }
+
+ /**
+ * Parse the byte[] and fill each field.
+ */
+ private void parse() {
+
+ if (start) {
+ // initialize field array and reusable objects
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector());
+ }
+ fieldVisited = new boolean[fields.length];
+ start = false;
+ }
+
+ Arrays.fill(fieldVisited, false);
+ reset = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!reset) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @param nullSequence
+ * The sequence representing NULL value.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // get the buffer
+ byte[] buffer = tuple.getFieldData(fieldID);
+ // get the offset of the field
+ int s1 = tuple.getFieldStart(fieldID);
+ int l1 = tuple.getFieldLength(fieldID);
+
+ if (!fieldVisited[fieldID]) {
+ fieldVisited[fieldID] = true;
+ fields[fieldID].init(buffer, s1, l1);
+ }
+ // if (fields[fieldID].getObject() == null) {
+ // throw new IllegalStateException("illegal field " + fieldID);
+ // }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!reset) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ protected boolean getParsed() {
+ return reset;
+ }
+
+ protected void setParsed(boolean parsed) {
+ this.reset = parsed;
+ }
+
+ protected LazyObject[] getFields() {
+ return fields;
+ }
+
+ protected void setFields(LazyObject[] fields) {
+ this.fields = fields;
+ }
+
+ protected boolean[] getFieldInited() {
+ return fieldVisited;
+ }
+
+ protected void setFieldInited(boolean[] fieldInited) {
+ this.fieldVisited = fieldInited;
+ }
+
+ /**
+ * rebind a frametuplereference to the struct
+ */
+ public void init(IFrameTupleReference r) {
+ this.tuple = r;
+ reset = false;
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
index d687aa1..1b2cc5a 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyDouble.java
@@ -23,31 +23,28 @@
/**
* LazyObject for storing a value of Double.
- *
*/
-public class LazyDouble extends
- LazyPrimitive<LazyDoubleObjectInspector, DoubleWritable> {
+public class LazyDouble extends LazyPrimitive<LazyDoubleObjectInspector, DoubleWritable> {
- public LazyDouble(LazyDoubleObjectInspector oi) {
- super(oi);
- data = new DoubleWritable();
- }
+ public LazyDouble(LazyDoubleObjectInspector oi) {
+ super(oi);
+ data = new DoubleWritable();
+ }
- public LazyDouble(LazyDouble copy) {
- super(copy);
- data = new DoubleWritable(copy.data.get());
- }
+ public LazyDouble(LazyDouble copy) {
+ super(copy);
+ data = new DoubleWritable(copy.data.get());
+ }
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
- assert (8 == length);
- data.set(Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(bytes, start)));
- }
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+ assert (8 == length);
+ data.set(Double.longBitsToDouble(LazyUtils.byteArrayToLong(bytes, start)));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
new file mode 100644
index 0000000..7caa9ed
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
@@ -0,0 +1,94 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyFactory.
+ */
+public final class LazyFactory {
+
+ /**
+ * Create a lazy binary primitive class given the type name.
+ */
+ public static LazyPrimitive<?, ?> createLazyPrimitiveClass(PrimitiveObjectInspector oi) {
+ PrimitiveCategory p = oi.getPrimitiveCategory();
+ switch (p) {
+ case BOOLEAN:
+ return new LazyBoolean((LazyBooleanObjectInspector) oi);
+ case BYTE:
+ return new LazyByte((LazyByteObjectInspector) oi);
+ case SHORT:
+ return new LazyShort((LazyShortObjectInspector) oi);
+ case INT:
+ return new LazyInteger((LazyIntObjectInspector) oi);
+ case LONG:
+ return new LazyLong((LazyLongObjectInspector) oi);
+ case FLOAT:
+ return new LazyFloat((LazyFloatObjectInspector) oi);
+ case DOUBLE:
+ return new LazyDouble((LazyDoubleObjectInspector) oi);
+ case STRING:
+ return new LazyString((LazyStringObjectInspector) oi);
+ default:
+ throw new RuntimeException("Internal error: no LazyObject for " + p);
+ }
+ }
+
+ /**
+ * Create a hierarchical LazyObject based on the given typeInfo.
+ */
+ public static LazyObject<? extends ObjectInspector> createLazyObject(ObjectInspector oi) {
+ ObjectInspector.Category c = oi.getCategory();
+ switch (c) {
+ case PRIMITIVE:
+ return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
+ case MAP:
+ return new LazyMap((LazyMapObjectInspector) oi);
+ case LIST:
+ return new LazyArray((LazyListObjectInspector) oi);
+ case STRUCT: // check whether it is a top-level struct
+ if (oi instanceof LazyStructObjectInspector)
+ return new LazyStruct((LazyStructObjectInspector) oi);
+ else
+ return new LazyColumnar((LazyColumnarObjectInspector) oi);
+ default:
+ throw new RuntimeException("Hive LazySerDe Internal error.");
+ }
+ }
+
+ private LazyFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
index 303cc67..430ac2e 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFloat.java
@@ -23,31 +23,29 @@
/**
* LazyObject for storing a value of Double.
- *
*/
-public class LazyFloat extends
- LazyPrimitive<LazyFloatObjectInspector, FloatWritable> {
+public class LazyFloat extends LazyPrimitive<LazyFloatObjectInspector, FloatWritable> {
- public LazyFloat(LazyFloatObjectInspector oi) {
- super(oi);
- data = new FloatWritable();
- }
+ public LazyFloat(LazyFloatObjectInspector oi) {
+ super(oi);
+ data = new FloatWritable();
+ }
- public LazyFloat(LazyFloat copy) {
- super(copy);
- data = new FloatWritable(copy.data.get());
- }
+ public LazyFloat(LazyFloat copy) {
+ super(copy);
+ data = new FloatWritable(copy.data.get());
+ }
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
- assert (4 == length);
- data.set(Float.intBitsToFloat(LazyUtils.byteArrayToInt(bytes, start)));
- }
+ assert (4 == length);
+ data.set(Float.intBitsToFloat(LazyUtils.byteArrayToInt(bytes, start)));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
new file mode 100644
index 0000000..0765c4f
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.IntWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
+
+/**
+ * LazyObject for storing a value of Integer.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyInteger extends LazyPrimitive<LazyIntObjectInspector, IntWritable> {
+
+ public LazyInteger(LazyIntObjectInspector oi) {
+ super(oi);
+ data = new IntWritable();
+ }
+
+ public LazyInteger(LazyInteger copy) {
+ super(copy);
+ data = new IntWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vInt for decoding the integer.
+ */
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVInt(bytes, start, vInt);
+ assert (length == vInt.length);
+ if (length != vInt.length)
+ throw new IllegalStateException("parse int: length mismatch, expected " + vInt.length + " but get "
+ + length);
+ data.set(vInt.value);
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
new file mode 100644
index 0000000..e6b56c3
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
+
+/**
+ * LazyObject for storing a value of Long.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyLong extends LazyPrimitive<LazyLongObjectInspector, LongWritable> {
+
+ public LazyLong(LazyLongObjectInspector oi) {
+ super(oi);
+ data = new LongWritable();
+ }
+
+ public LazyLong(LazyLong copy) {
+ super(copy);
+ data = new LongWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vLong for decoding the long.
+ */
+ VLong vLong = new LazyUtils.VLong();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ LazyUtils.readVLong(bytes, start, vLong);
+ assert (length == vLong.length);
+ if (length != vLong.length)
+ throw new IllegalStateException("parse long: length mismatch");
+ data.set(vLong.value);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
new file mode 100644
index 0000000..9c7af2e
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
@@ -0,0 +1,327 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
+
+/**
+ * LazyMap is serialized as follows: start A b c b c b c end bytes[] ->
+ * |--------|---|---|---|---| ... |---|---|
+ * Section A is the null-bytes. Suppose the map has N key-value pairs, then
+ * there are (N*2+7)/8 bytes used as null-bytes. Each bit corresponds to a key
+ * or a value and it indicates whether that key or value is null (0) or not null
+ * (1).
+ * After A, all the bytes are actual serialized data of the map, which are
+ * key-value pairs. b represent the keys and c represent the values. Each of
+ * them is again a LazyObject.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyMap.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The size of the map. Only valid when the data is parsed. -1 when the map
+ * is NULL.
+ */
+ int mapSize = 0;
+
+ /**
+ * The beginning position and length of key[i] and value[i]. Only valid when
+ * the data is parsed.
+ */
+ int[] keyStart;
+ int[] keyLength;
+ int[] valueStart;
+ int[] valueLength;
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is initialized or not.
+ */
+ boolean[] keyInited;
+ boolean[] valueInited;
+
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is null or not This could not be
+ * inferred from the length of the object. In particular, a 0-length string
+ * is not null.
+ */
+ boolean[] keyIsNull;
+ boolean[] valueIsNull;
+
+ /**
+ * The keys are stored in an array of LazyPrimitives.
+ */
+ LazyPrimitive<?, ?>[] keyObjects;
+ /**
+ * The values are stored in an array of LazyObjects. value[index] will start
+ * from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
+ */
+ LazyObject[] valueObjects;
+
+ protected LazyMap(LazyMapObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyMap.
+ *
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Adjust the size of arrays: keyStart, keyLength valueStart, valueLength
+ * keyInited, keyIsNull valueInited, valueIsNull.
+ */
+ protected void adjustArraySize(int newSize) {
+ if (keyStart == null || keyStart.length < newSize) {
+ keyStart = new int[newSize];
+ keyLength = new int[newSize];
+ valueStart = new int[newSize];
+ valueLength = new int[newSize];
+ keyInited = new boolean[newSize];
+ keyIsNull = new boolean[newSize];
+ valueInited = new boolean[newSize];
+ valueIsNull = new boolean[newSize];
+ keyObjects = new LazyPrimitive<?, ?>[newSize];
+ valueObjects = new LazyObject[newSize];
+ }
+ }
+
+ boolean nullMapKey = false;
+ VInt vInt = new LazyUtils.VInt();
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+
+ /**
+ * Parse the byte[] and fill keyStart, keyLength, keyIsNull valueStart,
+ * valueLength and valueIsNull.
+ */
+ private void parse() {
+
+ // get the VInt that represents the map size
+ LazyUtils.readVInt(bytes, start, vInt);
+ mapSize = vInt.value;
+ if (0 == mapSize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(mapSize);
+
+ // find out the null-bytes
+ int mapByteStart = start + vInt.length;
+ int nullByteCur = mapByteStart;
+ int nullByteEnd = mapByteStart + (mapSize * 2 + 7) / 8;
+ int lastElementByteEnd = nullByteEnd;
+
+ // parsing the keys and values one by one
+ for (int i = 0; i < mapSize; i++) {
+ // parse a key
+ keyIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2) % 8))) != 0) {
+ keyIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapKeyObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ keyLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = keyStart[i] + keyLength[i];
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+
+ // parse a value
+ valueIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i * 2 + 1) % 8))) != 0) {
+ valueIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(((MapObjectInspector) oi).getMapValueObjectInspector(), bytes,
+ lastElementByteEnd, recordInfo);
+ valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ valueLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = valueStart[i] + valueLength[i];
+ }
+
+ // move onto the next null byte
+ if (3 == (i % 4)) {
+ nullByteCur++;
+ }
+ }
+
+ Arrays.fill(keyInited, 0, mapSize, false);
+ Arrays.fill(valueInited, 0, mapSize, false);
+ parsed = true;
+ }
+
+ /**
+ * Get the value object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyObject uncheckedGetValue(int index) {
+ if (valueIsNull[index]) {
+ return null;
+ }
+ if (!valueInited[index]) {
+ valueInited[index] = true;
+ if (valueObjects[index] == null) {
+ valueObjects[index] = LazyFactory.createLazyObject(((MapObjectInspector) oi)
+ .getMapValueObjectInspector());
+ }
+ valueObjects[index].init(bytes, valueStart[index], valueLength[index]);
+ }
+ return valueObjects[index];
+ }
+
+ /**
+ * Get the value in the map for the key.
+ * If there are multiple matches (which is possible in the serialized
+ * format), only the first one is returned.
+ * The most efficient way to get the value for the key is to serialize the
+ * key and then try to find it in the array. We do linear search because in
+ * most cases, user only wants to get one or two values out of the map, and
+ * the cost of building up a HashMap is substantially higher.
+ *
+ * @param key
+ * The key object that we are looking for.
+ * @return The corresponding value object, or NULL if not found
+ */
+ public Object getMapValueElement(Object key) {
+ if (!parsed) {
+ parse();
+ }
+ // search for the key
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
+ if (lazyKeyI == null) {
+ continue;
+ }
+ // getWritableObject() will convert LazyPrimitive to actual
+ // primitive
+ // writable objects.
+ Object keyI = lazyKeyI.getWritableObject();
+ if (keyI == null) {
+ continue;
+ }
+ if (keyI.equals(key)) {
+ // Got a match, return the value
+ LazyObject v = uncheckedGetValue(i);
+ return v == null ? v : v.getObject();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Get the key object with the index without checking parsed.
+ *
+ * @param index
+ * The index into the array starting from 0
+ */
+ private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
+ if (keyIsNull[index]) {
+ return null;
+ }
+ if (!keyInited[index]) {
+ keyInited[index] = true;
+ if (keyObjects[index] == null) {
+ // Keys are always primitive
+ keyObjects[index] = LazyFactory
+ .createLazyPrimitiveClass((PrimitiveObjectInspector) ((MapObjectInspector) oi)
+ .getMapKeyObjectInspector());
+ }
+ keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
+ }
+ return keyObjects[index];
+ }
+
+ /**
+ * cachedMap is reused for different calls to getMap(). But each LazyMap has
+ * a separate cachedMap so we won't overwrite the data by accident.
+ */
+ LinkedHashMap<Object, Object> cachedMap;
+
+ /**
+ * Return the map object representing this LazyMap. Note that the keyObjects
+ * will be Writable primitive objects.
+ *
+ * @return the map object
+ */
+ public Map<Object, Object> getMap() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedMap == null) {
+ // Use LinkedHashMap to provide deterministic order
+ cachedMap = new LinkedHashMap<Object, Object>();
+ } else {
+ cachedMap.clear();
+ }
+
+ // go through each element of the map
+ for (int i = 0; i < mapSize; i++) {
+ LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
+ if (lazyKey == null) {
+ continue;
+ }
+ Object key = lazyKey.getObject();
+ // do not overwrite if there are duplicate keys
+ if (key != null && !cachedMap.containsKey(key)) {
+ LazyObject lazyValue = uncheckedGetValue(i);
+ Object value = (lazyValue == null ? null : lazyValue.getObject());
+ cachedMap.put(key, value);
+ }
+ }
+ return cachedMap;
+ }
+
+ /**
+ * Get the size of the map represented by this LazyMap.
+ *
+ * @return The size of the map
+ */
+ public int getMapSize() {
+ if (!parsed) {
+ parse();
+ }
+ return mapSize;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
new file mode 100644
index 0000000..f7ae1e3
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyNonPrimitive<OI extends ObjectInspector> extends LazyObject<OI> {
+
+ protected byte[] bytes;
+ protected int start;
+ protected int length;
+
+ /**
+ * Create a LazyNonPrimitive object with the specified ObjectInspector.
+ *
+ * @param oi
+ * The ObjectInspector would have to have a hierarchy of
+ * LazyObjectInspectors with the leaf nodes being
+ * WritableObjectInspectors. It's used both for accessing the
+ * type hierarchy of the complex object, as well as getting meta
+ * information (separator, nullSequence, etc) when parsing the
+ * lazy object.
+ */
+ protected LazyNonPrimitive(OI oi) {
+ super(oi);
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (bytes == null) {
+ throw new RuntimeException("bytes cannot be null!");
+ }
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ assert start >= 0;
+ assert start + length <= bytes.length;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ @Override
+ public int hashCode() {
+ return LazyUtils.hashBytes(bytes, start, length);
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
new file mode 100644
index 0000000..dc1dc60
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyObject stores an object in a range of bytes in a byte[].
+ * A LazyObject can represent any primitive object or hierarchical object like
+ * array, map or struct.
+ */
+public abstract class LazyObject<OI extends ObjectInspector> {
+
+ OI oi;
+
+ /**
+ * Create a LazyObject.
+ *
+ * @param oi
+ * Derived classes can access meta information about this Lazy
+ * Object (e.g, separator, nullSequence, escaper) from it.
+ */
+ protected LazyObject(OI oi) {
+ this.oi = oi;
+ }
+
+ /**
+ * Set the data for this LazyObject. We take ByteArrayRef instead of byte[]
+ * so that we will be able to drop the reference to byte[] by a single
+ * assignment. The ByteArrayRef object can be reused across multiple rows.
+ *
+ * @param bytes
+ * The wrapper of the byte[].
+ * @param start
+ * The start position inside the bytes.
+ * @param length
+ * The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(byte[] bytes, int start, int length);
+
+ public abstract void init(IFrameTupleReference tuple);
+
+ /**
+ * If the LazyObject is a primitive Object, then deserialize it and return
+ * the actual primitive Object. Otherwise (array, map, struct), return this.
+ */
+ public abstract Object getObject();
+
+ @Override
+ public abstract int hashCode();
+
+ protected OI getInspector() {
+ return oi;
+ }
+
+ protected void setInspector(OI oi) {
+ this.oi = oi;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
new file mode 100644
index 0000000..8139c65
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
@@ -0,0 +1,69 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+
+/**
+ * LazyPrimitive stores a primitive Object in a LazyObject.
+ */
+public abstract class LazyPrimitive<OI extends ObjectInspector, T extends Writable> extends LazyObject<OI> {
+
+ LazyPrimitive(OI oi) {
+ super(oi);
+ }
+
+ LazyPrimitive(LazyPrimitive<OI, T> copy) {
+ super(copy.oi);
+ isNull = copy.isNull;
+ }
+
+ T data;
+ boolean isNull = false;
+
+ /**
+ * Returns the primitive object represented by this LazyObject. This is
+ * useful because it can make sure we have "null" for null objects.
+ */
+ @Override
+ public Object getObject() {
+ return isNull ? null : this;
+ }
+
+ public T getWritableObject() {
+ return isNull ? null : data;
+ }
+
+ @Override
+ public String toString() {
+ return isNull ? "null" : data.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ return isNull ? 0 : data.hashCode();
+ }
+
+ @Override
+ public void init(IFrameTupleReference tuple) {
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
new file mode 100644
index 0000000..05b82ba
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
@@ -0,0 +1,460 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * The LazySerDe class combines the lazy property of LazySimpleSerDe class and
+ * the binary property of BinarySortable class. Lazy means a field is not
+ * deserialized until required. Binary means a field is serialized in binary
+ * compact format.
+ */
+public class LazySerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
+
+ public LazySerDe() {
+ }
+
+ List<String> columnNames;
+ List<TypeInfo> columnTypes;
+
+ TypeInfo rowTypeInfo;
+ ObjectInspector cachedObjectInspector;
+
+ // The object for storing row data
+ LazyColumnar cachedLazyStruct;
+
+ /**
+ * Initialize the SerDe with configuration and table information.
+ */
+ @Override
+ public void initialize(Configuration conf, Properties tbl) throws SerDeException {
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+ assert (columnNames.size() == columnTypes.size());
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ // Create the object inspector and the lazy binary struct object
+ cachedObjectInspector = LazyUtils.getLazyObjectInspectorFromTypeInfo(rowTypeInfo, true);
+ cachedLazyStruct = (LazyColumnar) LazyFactory.createLazyObject(cachedObjectInspector);
+ // output debug info
+ LOG.debug("LazySerDe initialized with: columnNames=" + columnNames + " columnTypes=" + columnTypes);
+ }
+
+ /**
+ * Returns the ObjectInspector for the row.
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ */
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ // The wrapper for byte array
+ ByteArrayRef byteArrayRef;
+
+ /**
+ * Deserialize a table record to a Lazy struct.
+ */
+ @SuppressWarnings("deprecation")
+ @Override
+ public Object deserialize(Writable field) throws SerDeException {
+ if (byteArrayRef == null) {
+ byteArrayRef = new ByteArrayRef();
+ }
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable) field;
+ if (b.getSize() == 0) {
+ return null;
+ }
+ // For backward-compatibility with hadoop 0.17
+ byteArrayRef.setData(b.get());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text) field;
+ if (t.getLength() == 0) {
+ return null;
+ }
+ byteArrayRef.setData(t.getBytes());
+ cachedLazyStruct.init(byteArrayRef.getData(), 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyStruct;
+ }
+
+ /**
+ * The reusable output buffer and serialize byte buffer.
+ */
+ BytesWritable serializeBytesWritable = new BytesWritable();
+ ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+ /**
+ * Serialize an object to a byte buffer in a binary compact way.
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
+ // make sure it is a struct record or not
+ serializeByteStream.reset();
+
+ if (objInspector.getCategory() != Category.STRUCT) {
+ // serialize the primitive object
+ serialize(serializeByteStream, obj, objInspector);
+ } else {
+ // serialize the row as a struct
+ serializeStruct(serializeByteStream, obj, (StructObjectInspector) objInspector);
+ }
+ // return the serialized bytes
+ serializeBytesWritable.set(serializeByteStream.getData(), 0, serializeByteStream.getCount());
+ return serializeBytesWritable;
+ }
+
+ boolean nullMapKey = false;
+
+ /**
+ * Serialize a struct object without writing the byte size. This function is
+ * shared by both row serialization and struct serialization.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the struct object to serialize
+ * @param objInspector
+ * the struct object inspector
+ */
+ private void serializeStruct(Output byteStream, Object obj, StructObjectInspector soi) {
+ // do nothing for null struct
+ if (null == obj) {
+ return;
+ }
+ /*
+ * Interleave serializing one null byte and 8 struct fields in each
+ * round, in order to support data deserialization with different table
+ * schemas
+ */
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ int size = fields.size();
+ int lasti = 0;
+ byte nullByte = 0;
+ for (int i = 0; i < size; i++) {
+ // set bit to 1 if a field is not null
+ if (null != soi.getStructFieldData(obj, fields.get(i))) {
+ nullByte |= 1 << (i % 8);
+ }
+ // write the null byte every eight elements or
+ // if this is the last element and serialize the
+ // corresponding 8 struct fields at the same time
+ if (7 == i % 8 || i == size - 1) {
+ serializeByteStream.write(nullByte);
+ for (int j = lasti; j <= i; j++) {
+ serialize(serializeByteStream, soi.getStructFieldData(obj, fields.get(j)), fields.get(j)
+ .getFieldObjectInspector());
+ }
+ lasti = i + 1;
+ nullByte = 0;
+ }
+ }
+ }
+
+ /**
+ * A recursive function that serialize an object to a byte buffer based on
+ * its object inspector.
+ *
+ * @param byteStream
+ * the byte stream storing the serialization data
+ * @param obj
+ * the object to serialize
+ * @param objInspector
+ * the object inspector
+ */
+ private void serialize(Output byteStream, Object obj, ObjectInspector objInspector) {
+
+ // do nothing for null object
+ if (null == obj) {
+ return;
+ }
+
+ switch (objInspector.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ return;
+ }
+ case BOOLEAN: {
+ boolean v = ((BooleanObjectInspector) poi).get(obj);
+ byteStream.write((byte) (v ? 1 : 0));
+ return;
+ }
+ case BYTE: {
+ ByteObjectInspector boi = (ByteObjectInspector) poi;
+ byte v = boi.get(obj);
+ byteStream.write(v);
+ return;
+ }
+ case SHORT: {
+ ShortObjectInspector spoi = (ShortObjectInspector) poi;
+ short v = spoi.get(obj);
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case INT: {
+ IntObjectInspector ioi = (IntObjectInspector) poi;
+ int v = ioi.get(obj);
+ LazyUtils.writeVInt(byteStream, v);
+ return;
+ }
+ case LONG: {
+ LongObjectInspector loi = (LongObjectInspector) poi;
+ long v = loi.get(obj);
+ LazyUtils.writeVLong(byteStream, v);
+ return;
+ }
+ case FLOAT: {
+ FloatObjectInspector foi = (FloatObjectInspector) poi;
+ int v = Float.floatToIntBits(foi.get(obj));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case DOUBLE: {
+ DoubleObjectInspector doi = (DoubleObjectInspector) poi;
+ long v = Double.doubleToLongBits(doi.get(obj));
+ byteStream.write((byte) (v >> 56));
+ byteStream.write((byte) (v >> 48));
+ byteStream.write((byte) (v >> 40));
+ byteStream.write((byte) (v >> 32));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case STRING: {
+ StringObjectInspector soi = (StringObjectInspector) poi;
+ Text t = soi.getPrimitiveWritableObject(obj);
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ LazyUtils.writeVInt(byteStream, length);
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector) objInspector;
+ ObjectInspector eoi = loi.getListElementObjectInspector();
+
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int listStart = byteStream.getCount();
+
+ // 2/ write the size of the list as a VInt
+ int size = loi.getListLength(obj);
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ byte nullByte = 0;
+ for (int eid = 0; eid < size; eid++) {
+ // set the bit to 1 if an element is not null
+ if (null != loi.getListElement(obj, eid)) {
+ nullByte |= 1 << (eid % 8);
+ }
+ // store the byte every eight elements or
+ // if this is the last element
+ if (7 == eid % 8 || eid == size - 1) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write element by element from the list
+ for (int eid = 0; eid < size; eid++) {
+ serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ }
+
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+
+ return;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector) objInspector;
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<?, ?> map = moi.getMap(obj);
+
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int mapStart = byteStream.getCount();
+
+ // 2/ write the size of the map which is a VInt
+ int size = map.size();
+ LazyUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ int b = 0;
+ byte nullByte = 0;
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ // set the bit to 1 if a key is not null
+ if (null != entry.getKey()) {
+ nullByte |= 1 << (b % 8);
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+ b++;
+ // set the bit to 1 if a value is not null
+ if (null != entry.getValue()) {
+ nullByte |= 1 << (b % 8);
+ }
+ b++;
+ // write the byte to stream every 4 key-value pairs
+ // or if this is the last key-value pair
+ if (0 == b % 8 || b == size * 2) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write key-value pairs one by one
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ serialize(byteStream, entry.getKey(), koi);
+ serialize(byteStream, entry.getValue(), voi);
+ }
+
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+
+ return;
+ }
+ case STRUCT: {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int structStart = byteStream.getCount();
+
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte[] bytes = byteStream.getData();
+ bytes[byteSizeStart] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+ }
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
new file mode 100644
index 0000000..f493b37
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
+
+/**
+ * LazyObject for storing a value of Short.
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project. As with the specification, this implementation relied on code laid out in <a href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's Delight, (Addison Wesley, 2002)</a> as well as <a href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ */
+public class LazyShort extends LazyPrimitive<LazyShortObjectInspector, ShortWritable> {
+
+ public LazyShort(LazyShortObjectInspector oi) {
+ super(oi);
+ data = new ShortWritable();
+ }
+
+ public LazyShort(LazyShort copy) {
+ super(copy);
+ data = new ShortWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ assert (2 == length);
+ data.set(LazyUtils.byteArrayToShort(bytes, start));
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
new file mode 100644
index 0000000..0293af8
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
+
+/**
+ * LazyObject for storing a value of String.
+ */
+public class LazyString extends LazyPrimitive<LazyStringObjectInspector, Text> {
+
+ public LazyString(LazyStringObjectInspector oi) {
+ super(oi);
+ data = new Text();
+ }
+
+ public LazyString(LazyString copy) {
+ super(copy);
+ data = new Text(copy.data);
+ }
+
+ VInt vInt = new LazyUtils.VInt();
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ if (length == 0) {
+ isNull = true;
+ return;
+ } else
+ isNull = false;
+
+ // get the byte length of the string
+ LazyUtils.readVInt(bytes, start, vInt);
+ if (vInt.value + vInt.length != length)
+ throw new IllegalStateException("parse string: length mismatch, expected " + (vInt.value + vInt.length)
+ + " but get " + length);
+ assert (length - vInt.length > -1);
+ data.set(bytes, start + vInt.length, length - vInt.length);
+ }
+
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
new file mode 100644
index 0000000..47e95e4
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
@@ -0,0 +1,234 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
+
+/**
+ * LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
+ * |-----|---------|--- ... ---|-----|---------|
+ * Section A is one null-byte, corresponding to eight struct fields in Section
+ * B. Each bit indicates whether the corresponding field is null (0) or not null
+ * (1). Each field is a LazyObject.
+ * Following B, there is another section A and B. This pattern repeats until the
+ * all struct fields are serialized.
+ */
+public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The fields of the struct.
+ */
+ @SuppressWarnings("rawtypes")
+ LazyObject[] fields;
+
+ /**
+ * Whether a field is initialized or not.
+ */
+ boolean[] fieldInited;
+
+ /**
+ * Whether a field is null or not. Because length is 0 does not means the
+ * field is null. In particular, a 0-length string is not null.
+ */
+ boolean[] fieldIsNull;
+
+ /**
+ * The start positions and lengths of struct fields. Only valid when the
+ * data is parsed.
+ */
+ int[] fieldStart;
+ int[] fieldLength;
+
+ /**
+ * Construct a LazyStruct object with an ObjectInspector.
+ */
+ protected LazyStruct(LazyStructObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(byte[] bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ RecordInfo recordInfo = new LazyUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
+ * fieldIsNull.
+ */
+ private void parse() {
+
+ List<? extends StructField> fieldRefs = ((StructObjectInspector) oi).getAllStructFieldRefs();
+
+ if (fields == null) {
+ fields = new LazyObject[fieldRefs.size()];
+ for (int i = 0; i < fields.length; i++) {
+ ObjectInspector insp = fieldRefs.get(i).getFieldObjectInspector();
+ fields[i] = insp == null ? null : LazyFactory.createLazyObject(insp);
+ }
+ fieldInited = new boolean[fields.length];
+ fieldIsNull = new boolean[fields.length];
+ fieldStart = new int[fields.length];
+ fieldLength = new int[fields.length];
+ }
+
+ /**
+ * Please note that one null byte is followed by eight fields, then more
+ * null byte and fields.
+ */
+
+ int fieldId = 0;
+ int structByteEnd = start + length;
+
+ byte nullByte = bytes[start];
+ int lastFieldByteEnd = start + 1;
+ // Go through all bytes in the byte[]
+ for (int i = 0; i < fields.length; i++) {
+ fieldIsNull[i] = true;
+ if ((nullByte & (1 << (i % 8))) != 0) {
+ fieldIsNull[i] = false;
+ LazyUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), bytes, lastFieldByteEnd,
+ recordInfo);
+ fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
+ fieldLength[i] = recordInfo.elementSize;
+ lastFieldByteEnd = fieldStart[i] + fieldLength[i];
+ }
+
+ // count how many fields are there
+ if (lastFieldByteEnd <= structByteEnd) {
+ fieldId++;
+ }
+ // next byte is a null byte if there are more bytes to go
+ if (7 == (i % 8)) {
+ if (lastFieldByteEnd < structByteEnd) {
+ nullByte = bytes[lastFieldByteEnd];
+ lastFieldByteEnd++;
+ } else {
+ // otherwise all null afterwards
+ nullByte = 0;
+ lastFieldByteEnd++;
+ }
+ }
+ }
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but " + "only got " + fieldId
+ + "! Ignoring similar problems.");
+ }
+
+ Arrays.fill(fieldInited, false);
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID
+ * The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!parsed) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (fieldIsNull[fieldID]) {
+ return null;
+ }
+ if (!fieldInited[fieldID]) {
+ fieldInited[fieldID] = true;
+ fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+ }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+
+ /**
+ * Get the values of the fields as an ArrayList.
+ *
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i = 0; i < fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
new file mode 100644
index 0000000..6554ccc
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
@@ -0,0 +1,503 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
+
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory;
+import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+/**
+ * LazyUtils.
+ */
+public final class LazyUtils {
+
+ /**
+ * Convert the byte array to an int starting from the given offset. Refer to
+ * code by aeden on DZone Snippets:
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the integer
+ */
+ public static int byteArrayToInt(byte[] b, int offset) {
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ int shift = (4 - 1 - i) * 8;
+ value += (b[i + offset] & 0x000000FF) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a long starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the long
+ */
+ public static long byteArrayToLong(byte[] b, int offset) {
+ long value = 0;
+ for (int i = 0; i < 8; i++) {
+ int shift = (8 - 1 - i) * 8;
+ value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a short starting from the given offset.
+ *
+ * @param b
+ * the byte array
+ * @param offset
+ * the array offset
+ * @return the short
+ */
+ public static short byteArrayToShort(byte[] b, int offset) {
+ short value = 0;
+ value += (b[offset] & 0x000000FF) << 8;
+ value += (b[offset + 1] & 0x000000FF);
+ return value;
+ }
+
+ /**
+ * Record is the unit that data is serialized in. A record includes two
+ * parts. The first part stores the size of the element and the second part
+ * stores the real element. size element record ->
+ * |----|-------------------------|
+ * A RecordInfo stores two information of a record, the size of the "size"
+ * part which is the element offset and the size of the element part which
+ * is element size.
+ */
+ public static class RecordInfo {
+ public RecordInfo() {
+ elementOffset = 0;
+ elementSize = 0;
+ }
+
+ public byte elementOffset;
+ public int elementSize;
+
+ @Override
+ public String toString() {
+ return "(" + elementOffset + ", " + elementSize + ")";
+ }
+ }
+
+ static VInt vInt = new LazyUtils.VInt();
+
+ /**
+ * Check a particular field and set its size and offset in bytes based on
+ * the field type and the bytes arrays.
+ * For void, boolean, byte, short, int, long, float and double, there is no
+ * offset and the size is fixed. For string, map, list, struct, the first
+ * four bytes are used to store the size. So the offset is 4 and the size is
+ * computed by concating the first four bytes together. The first four bytes
+ * are defined with respect to the offset in the bytes arrays.
+ *
+ * @param objectInspector
+ * object inspector of the field
+ * @param bytes
+ * bytes arrays store the table row
+ * @param offset
+ * offset of this field
+ * @param recordInfo
+ * modify this byteinfo object and return it
+ */
+ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset,
+ RecordInfo recordInfo) {
+ Category category = objectInspector.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
+ .getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case VOID:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 0;
+ break;
+ case BOOLEAN:
+ case BYTE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 1;
+ break;
+ case SHORT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 2;
+ break;
+ case FLOAT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 4;
+ break;
+ case DOUBLE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 8;
+ break;
+ case INT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case LONG:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case STRING:
+ // using vint instead of 4 bytes
+ LazyUtils.readVInt(bytes, offset, vInt);
+ recordInfo.elementOffset = vInt.length;
+ recordInfo.elementSize = vInt.value;
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
+ }
+ }
+ break;
+ case LIST:
+ case MAP:
+ case STRUCT:
+ recordInfo.elementOffset = 4;
+ recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset);
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized non-primitive type: " + category);
+ }
+ }
+ }
+
+ /**
+ * A zero-compressed encoded long.
+ */
+ public static class VLong {
+ public VLong() {
+ value = 0;
+ length = 0;
+ }
+
+ public long value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded long from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vlong
+ * storing the deserialized long and its size in byte
+ */
+ public static void readVLong(byte[] bytes, int offset, VLong vlong) {
+ byte firstByte = bytes[offset];
+ vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vlong.length == 1) {
+ vlong.value = firstByte;
+ return;
+ }
+ long i = 0;
+ for (int idx = 0; idx < vlong.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * A zero-compressed encoded integer.
+ */
+ public static class VInt implements Serializable {
+ private static final long serialVersionUID = 1L;
+
+ public VInt() {
+ value = 0;
+ length = 0;
+ }
+
+ public int value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded int from a byte array and returns it.
+ *
+ * @param bytes
+ * the byte array
+ * @param offset
+ * offset of the array to read from
+ * @param vInt
+ * storing the deserialized int and its size in byte
+ */
+ public static void readVInt(byte[] bytes, int offset, VInt vInt) {
+ byte firstByte = bytes[offset];
+ vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte);
+ if (vInt.length == 1) {
+ vInt.value = firstByte;
+ return;
+ }
+ int i = 0;
+ for (int idx = 0; idx < vInt.length - 1; idx++) {
+ byte b = bytes[offset + 1 + idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(Output byteStream, int i) {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(Output byteStream, long l) {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+
+ static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns the lazy binary object inspector that can be used to inspect an
+ * lazy binary object of that typeInfo
+ * For primitive types, we use the standard writable object inspector.
+ */
+ public static ObjectInspector getLazyObjectInspectorFromTypeInfo(TypeInfo typeInfo, boolean topLevel) {
+ if (typeInfo == null)
+ throw new IllegalStateException("illegal type null ");
+ ObjectInspector result = cachedLazyObjectInspector.get(typeInfo);
+ if (result == null) {
+ switch (typeInfo.getCategory()) {
+ case PRIMITIVE: {
+ result = PrimitiveObjectInspectorFactory
+ .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
+ break;
+ }
+ case LIST: {
+ ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ ((ListTypeInfo) typeInfo).getListElementTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyListObjectInspector(elementObjectInspector);
+ break;
+ }
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
+ ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapKeyTypeInfo(), false);
+ ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo(
+ mapTypeInfo.getMapValueTypeInfo(), false);
+ result = LazyObjectInspectorFactory.getLazyMapObjectInspector(keyObjectInspector,
+ valueObjectInspector);
+ break;
+ }
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ // if it is top level then create columnar
+ if (topLevel)
+ result = LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames,
+ fieldObjectInspectors);
+ // if it is not top level then create struct
+ else
+ result = LazyObjectInspectorFactory.getLazyStructObjectInspector(fieldNames,
+ fieldObjectInspectors);
+
+ break;
+ }
+ default: {
+ result = null;
+ }
+ }
+ cachedLazyObjectInspector.put(typeInfo, result);
+ }
+ return result;
+ }
+
+ /**
+ * get top-level lazy object inspector
+ *
+ * @param fieldNames
+ * @param fieldTypeInfos
+ * @return
+ */
+ public static ObjectInspector getLazyObjectInspector(List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(fieldTypeInfos.get(i), false));
+ }
+
+ return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(fieldNames, fieldObjectInspectors);
+ }
+
+ private LazyUtils() {
+ // prevent instantiation
+ }
+
+ /**
+ * Returns -1 if the first byte sequence is lexicographically less than the
+ * second; returns +1 if the second byte sequence is lexicographically less
+ * than the first; otherwise return 0.
+ */
+ public static int compare(byte[] b1, int start1, int length1, byte[] b2, int start2, int length2) {
+
+ int min = Math.min(length1, length2);
+
+ for (int i = 0; i < min; i++) {
+ if (b1[start1 + i] == b2[start2 + i]) {
+ continue;
+ }
+ if (b1[start1 + i] < b2[start2 + i]) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+
+ if (length1 < length2) {
+ return -1;
+ }
+ if (length1 > length2) {
+ return 1;
+ }
+ return 0;
+ }
+
+ public static int hashBytes(byte[] data, int start, int len) {
+ int hash = 1;
+ for (int i = start; i < len; i++) {
+ hash = (31 * hash) + data[i];
+ }
+ return hash;
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param i
+ * the int
+ */
+ public static void writeVInt(DataOutput byteStream, int i) throws IOException {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ *
+ * @param byteStream
+ * the byte array/stream
+ * @param l
+ * the long
+ */
+ public static void writeVLong(DataOutput byteStream, long l) throws IOException {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte) l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte) len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte) ((l & mask) >> shiftbits));
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
new file mode 100644
index 0000000..b1ca622
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
+
+/**
+ * ObjectInspector for LazyColumnar.
+ *
+ * @see LazyColumnar
+ */
+public class LazyColumnarObjectInspector extends StandardStructObjectInspector implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ public LazyColumnarObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ public LazyColumnarObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ Object column = struct.getField(fieldID);
+ return column;
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyColumnar struct = (LazyColumnar) data;
+ return struct.getFieldsAsList();
+ }
+
+ public String toString() {
+ String str = "";
+ for (MyField f : fields) {
+ str += f.getFieldName() + ":" + f.getFieldObjectInspector().getTypeName() + " ";
+ }
+ return str;
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
similarity index 61%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
index dc4e85b..aaa5d66 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyListObjectInspector.java
@@ -29,34 +29,34 @@
*/
public class LazyListObjectInspector extends StandardListObjectInspector {
- protected LazyListObjectInspector(ObjectInspector listElementObjectInspector) {
- super(listElementObjectInspector);
- }
+ protected LazyListObjectInspector(ObjectInspector listElementObjectInspector) {
+ super(listElementObjectInspector);
+ }
- @Override
- public List<?> getList(Object data) {
- if (data == null) {
- return null;
- }
- LazyArray array = (LazyArray) data;
- return array.getList();
- }
+ @Override
+ public List<?> getList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getList();
+ }
- @Override
- public Object getListElement(Object data, int index) {
- if (data == null) {
- return null;
- }
- LazyArray array = (LazyArray) data;
- return array.getListElementObject(index);
- }
+ @Override
+ public Object getListElement(Object data, int index) {
+ if (data == null) {
+ return null;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListElementObject(index);
+ }
- @Override
- public int getListLength(Object data) {
- if (data == null) {
- return -1;
- }
- LazyArray array = (LazyArray) data;
- return array.getListLength();
- }
+ @Override
+ public int getListLength(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ LazyArray array = (LazyArray) data;
+ return array.getListLength();
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
similarity index 62%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
index a3be142..1b0c412 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyMapObjectInspector.java
@@ -31,32 +31,31 @@
*/
public class LazyMapObjectInspector extends StandardMapObjectInspector {
- protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector,
- ObjectInspector mapValueObjectInspector) {
- super(mapKeyObjectInspector, mapValueObjectInspector);
- }
+ protected LazyMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+ super(mapKeyObjectInspector, mapValueObjectInspector);
+ }
- @Override
- public Map<?, ?> getMap(Object data) {
- if (data == null) {
- return null;
- }
- return ((LazyMap) data).getMap();
- }
+ @Override
+ public Map<?, ?> getMap(Object data) {
+ if (data == null) {
+ return null;
+ }
+ return ((LazyMap) data).getMap();
+ }
- @Override
- public int getMapSize(Object data) {
- if (data == null) {
- return -1;
- }
- return ((LazyMap) data).getMapSize();
- }
+ @Override
+ public int getMapSize(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapSize();
+ }
- @Override
- public Object getMapValueElement(Object data, Object key) {
- if (data == null) {
- return -1;
- }
- return ((LazyMap) data).getMapValueElement(key);
- }
+ @Override
+ public Object getMapValueElement(Object data, Object key) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyMap) data).getMapValueElement(key);
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
new file mode 100644
index 0000000..8093c94
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector
+ * instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspectors do not have an
+ * internal state - so ObjectInspectors with the same construction parameters
+ * should result in exactly the same ObjectInspector.
+ */
+
+public final class LazyObjectInspectorFactory {
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector> cachedLazyColumnarObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector> cachedLazyStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazyListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazyMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
+
+ public static LazyColumnarObjectInspector getLazyColumnarObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyColumnarObjectInspector result = cachedLazyColumnarObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyColumnarObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyColumnarObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyStructObjectInspector getLazyStructObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyStructObjectInspector result = cachedLazyStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyListObjectInspector getLazyListObjectInspector(ObjectInspector listElementInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(listElementInspector);
+ LazyListObjectInspector result = cachedLazyListObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyListObjectInspector(listElementInspector);
+ cachedLazyListObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static LazyMapObjectInspector getLazyMapObjectInspector(ObjectInspector keyInspector,
+ ObjectInspector valueInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(keyInspector);
+ signature.add(valueInspector);
+ LazyMapObjectInspector result = cachedLazyMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyMapObjectInspector(keyInspector, valueInspector);
+ cachedLazyMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ private LazyObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
new file mode 100644
index 0000000..ad70d4c
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyStruct;
+
+/**
+ * ObjectInspector for LazyStruct.
+ *
+ * @see LazyStruct
+ */
+public class LazyStructObjectInspector extends StandardStructObjectInspector {
+
+ protected LazyStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ protected LazyStructObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert (fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyStruct struct = (LazyStruct) data;
+ return struct.getFieldsAsList();
+ }
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
similarity index 72%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
index 7ef8bdd..eaa2bbc 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/AbstractPrimitiveLazyObjectInspector.java
@@ -26,24 +26,23 @@
/**
* An AbstractPrimitiveLazyObjectInspector for a LazyPrimitive object.
*/
-public abstract class AbstractPrimitiveLazyObjectInspector<T extends Writable>
- extends AbstractPrimitiveObjectInspector {
+public abstract class AbstractPrimitiveLazyObjectInspector<T extends Writable> extends AbstractPrimitiveObjectInspector {
- protected AbstractPrimitiveLazyObjectInspector(PrimitiveTypeEntry typeEntry) {
- super(typeEntry);
- }
+ protected AbstractPrimitiveLazyObjectInspector(PrimitiveTypeEntry typeEntry) {
+ super(typeEntry);
+ }
- @SuppressWarnings("unchecked")
- @Override
- public T getPrimitiveWritableObject(Object o) {
- if (o == null)
- System.out.println("sth. wrong");
- return o == null ? null : ((LazyPrimitive<?, T>) o).getWritableObject();
- }
+ @SuppressWarnings("unchecked")
+ @Override
+ public T getPrimitiveWritableObject(Object o) {
+ if (o == null)
+ System.out.println("sth. wrong");
+ return o == null ? null : ((LazyPrimitive<?, T>) o).getWritableObject();
+ }
- @Override
- public boolean preferWritable() {
- return true;
- }
+ @Override
+ public boolean preferWritable() {
+ return true;
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
similarity index 66%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
index 472dce0..7927c1e 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyBooleanObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableBooleanObjectInspector inspects a BooleanWritable Object.
*/
-public class LazyBooleanObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<BooleanWritable> implements
- BooleanObjectInspector {
+public class LazyBooleanObjectInspector extends AbstractPrimitiveLazyObjectInspector<BooleanWritable> implements
+ BooleanObjectInspector {
- LazyBooleanObjectInspector() {
- super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
- }
+ LazyBooleanObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.booleanTypeEntry);
+ }
- @Override
- public boolean get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public boolean get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyBoolean((LazyBoolean) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyBoolean((LazyBoolean) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Boolean.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Boolean.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
index e631fc7..10a881c 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyByteObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableByteObjectInspector inspects a ByteWritable Object.
*/
-public class LazyByteObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<ByteWritable> implements
- ByteObjectInspector {
+public class LazyByteObjectInspector extends AbstractPrimitiveLazyObjectInspector<ByteWritable> implements
+ ByteObjectInspector {
- LazyByteObjectInspector() {
- super(PrimitiveObjectInspectorUtils.byteTypeEntry);
- }
+ LazyByteObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.byteTypeEntry);
+ }
- @Override
- public byte get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public byte get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyByte((LazyByte) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyByte((LazyByte) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Byte.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Byte.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
index 1257f11..9f98b56 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyDoubleObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableDoubleObjectInspector inspects a DoubleWritable Object.
*/
-public class LazyDoubleObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<DoubleWritable> implements
- DoubleObjectInspector {
+public class LazyDoubleObjectInspector extends AbstractPrimitiveLazyObjectInspector<DoubleWritable> implements
+ DoubleObjectInspector {
- LazyDoubleObjectInspector() {
- super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
- }
+ LazyDoubleObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.doubleTypeEntry);
+ }
- @Override
- public double get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public double get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyDouble((LazyDouble) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyDouble((LazyDouble) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Double.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Double.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
index c66a06f..bf3e9a2 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyFloatObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A FloatObjectInspector inspects a FloatWritable Object.
*/
-public class LazyFloatObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<FloatWritable> implements
- FloatObjectInspector {
+public class LazyFloatObjectInspector extends AbstractPrimitiveLazyObjectInspector<FloatWritable> implements
+ FloatObjectInspector {
- LazyFloatObjectInspector() {
- super(PrimitiveObjectInspectorUtils.floatTypeEntry);
- }
+ LazyFloatObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.floatTypeEntry);
+ }
- @Override
- public float get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public float get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyFloat((LazyFloat) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyFloat((LazyFloat) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Float.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Float.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
index b2159e0..87bcb0d 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyIntObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableIntObjectInspector inspects a IntWritable Object.
*/
-public class LazyIntObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<IntWritable> implements
- IntObjectInspector {
+public class LazyIntObjectInspector extends AbstractPrimitiveLazyObjectInspector<IntWritable> implements
+ IntObjectInspector {
- LazyIntObjectInspector() {
- super(PrimitiveObjectInspectorUtils.intTypeEntry);
- }
+ LazyIntObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.intTypeEntry);
+ }
- @Override
- public int get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public int get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyInteger((LazyInteger) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyInteger((LazyInteger) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Integer.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Integer.valueOf(get(o));
+ }
}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
index 1fc2d53..06b5d3c 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyLongObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableLongObjectInspector inspects a LongWritable Object.
*/
-public class LazyLongObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<LongWritable> implements
- LongObjectInspector {
+public class LazyLongObjectInspector extends AbstractPrimitiveLazyObjectInspector<LongWritable> implements
+ LongObjectInspector {
- LazyLongObjectInspector() {
- super(PrimitiveObjectInspectorUtils.longTypeEntry);
- }
+ LazyLongObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.longTypeEntry);
+ }
- @Override
- public long get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public long get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyLong((LazyLong) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyLong((LazyLong) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Long.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Long.valueOf(get(o));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..5d7ef48
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.ArrayList;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * LazyPrimitiveObjectInspectorFactory is the primary way to create new
+ * ObjectInspector instances.
+ * SerDe classes should call the static functions in this library to create an
+ * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class LazyPrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LAZY_BOOLEAN_OBJECT_INSPECTOR = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LAZY_BYTE_OBJECT_INSPECTOR = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LAZY_SHORT_OBJECT_INSPECTOR = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LAZY_INT_OBJECT_INSPECTOR = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LAZY_LONG_OBJECT_INSPECTOR = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LAZY_FLOAT_OBJECT_INSPECTOR = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LAZY_DOUBLE_OBJECT_INSPECTOR = new LazyDoubleObjectInspector();
+ public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR = new LazyVoidObjectInspector();
+
+ static ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector> cachedLazyStringObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector>();
+
+ public static LazyStringObjectInspector getLazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(Boolean.valueOf(escaped));
+ signature.add(Byte.valueOf(escapeChar));
+ LazyStringObjectInspector result = cachedLazyStringObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyStringObjectInspector(escaped, escapeChar);
+ cachedLazyStringObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ public static AbstractPrimitiveLazyObjectInspector<?> getLazyObjectInspector(PrimitiveCategory primitiveCategory,
+ boolean escaped, byte escapeChar) {
+
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return LAZY_BOOLEAN_OBJECT_INSPECTOR;
+ case BYTE:
+ return LAZY_BYTE_OBJECT_INSPECTOR;
+ case SHORT:
+ return LAZY_SHORT_OBJECT_INSPECTOR;
+ case INT:
+ return LAZY_INT_OBJECT_INSPECTOR;
+ case LONG:
+ return LAZY_LONG_OBJECT_INSPECTOR;
+ case FLOAT:
+ return LAZY_FLOAT_OBJECT_INSPECTOR;
+ case DOUBLE:
+ return LAZY_DOUBLE_OBJECT_INSPECTOR;
+ case STRING:
+ return getLazyStringObjectInspector(escaped, escapeChar);
+ case VOID:
+ return LAZY_VOID_OBJECT_INSPECTOR;
+ default:
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ }
+
+ private LazyPrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
similarity index 67%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
index cb06dfd..b02d9bc 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyShortObjectInspector.java
@@ -26,26 +26,25 @@
/**
* A WritableShortObjectInspector inspects a ShortWritable Object.
*/
-public class LazyShortObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<ShortWritable> implements
- ShortObjectInspector {
+public class LazyShortObjectInspector extends AbstractPrimitiveLazyObjectInspector<ShortWritable> implements
+ ShortObjectInspector {
- LazyShortObjectInspector() {
- super(PrimitiveObjectInspectorUtils.shortTypeEntry);
- }
+ LazyShortObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.shortTypeEntry);
+ }
- @Override
- public short get(Object o) {
- return getPrimitiveWritableObject(o).get();
- }
+ @Override
+ public short get(Object o) {
+ return getPrimitiveWritableObject(o).get();
+ }
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyShort((LazyShort) o);
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyShort((LazyShort) o);
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- return o == null ? null : Short.valueOf(get(o));
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ return o == null ? null : Short.valueOf(get(o));
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
new file mode 100644
index 0000000..4d649dc
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyString;
+
+/**
+ * A WritableStringObjectInspector inspects a Text Object.
+ */
+public class LazyStringObjectInspector extends AbstractPrimitiveLazyObjectInspector<Text> implements
+ StringObjectInspector {
+
+ boolean escaped;
+ byte escapeChar;
+
+ LazyStringObjectInspector(boolean escaped, byte escapeChar) {
+ super(PrimitiveObjectInspectorUtils.stringTypeEntry);
+ this.escaped = escaped;
+ this.escapeChar = escapeChar;
+ }
+
+ @Override
+ public Object copyObject(Object o) {
+ return o == null ? null : new LazyString((LazyString) o);
+ }
+
+ @Override
+ public Text getPrimitiveWritableObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject();
+ }
+
+ @Override
+ public String getPrimitiveJavaObject(Object o) {
+ return o == null ? null : ((LazyString) o).getWritableObject().toString();
+ }
+
+ public boolean isEscaped() {
+ return escaped;
+ }
+
+ public byte getEscapeChar() {
+ return escapeChar;
+ }
+
+}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
similarity index 71%
rename from hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
rename to hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
index a30f1af..c916191 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyVoidObjectInspector.java
@@ -24,21 +24,20 @@
/**
* A WritableVoidObjectInspector inspects a NullWritable Object.
*/
-public class LazyVoidObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<NullWritable> implements
- VoidObjectInspector {
+public class LazyVoidObjectInspector extends AbstractPrimitiveLazyObjectInspector<NullWritable> implements
+ VoidObjectInspector {
- LazyVoidObjectInspector() {
- super(PrimitiveObjectInspectorUtils.voidTypeEntry);
- }
+ LazyVoidObjectInspector() {
+ super(PrimitiveObjectInspectorUtils.voidTypeEntry);
+ }
- @Override
- public Object copyObject(Object o) {
- return o;
- }
+ @Override
+ public Object copyObject(Object o) {
+ return o;
+ }
- @Override
- public Object getPrimitiveJavaObject(Object o) {
- throw new RuntimeException("Internal error: cannot create Void object.");
- }
+ @Override
+ public Object getPrimitiveJavaObject(Object o) {
+ throw new RuntimeException("Internal error: cannot create Void object.");
+ }
}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
new file mode 100644
index 0000000..33f0e51
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+
+/**
+ * PrimitiveObjectInspectorFactory is the primary way to create new
+ * PrimitiveObjectInspector instances.
+ * The reason of having caches here is that ObjectInspector is because
+ * ObjectInspectors do not have an internal state - so ObjectInspectors with the
+ * same construction parameters should result in exactly the same
+ * ObjectInspector.
+ */
+public final class PrimitiveObjectInspectorFactory {
+
+ public static final LazyBooleanObjectInspector LazyBooleanObjectInspector = new LazyBooleanObjectInspector();
+ public static final LazyByteObjectInspector LazyByteObjectInspector = new LazyByteObjectInspector();
+ public static final LazyShortObjectInspector LazyShortObjectInspector = new LazyShortObjectInspector();
+ public static final LazyIntObjectInspector LazyIntObjectInspector = new LazyIntObjectInspector();
+ public static final LazyLongObjectInspector LazyLongObjectInspector = new LazyLongObjectInspector();
+ public static final LazyFloatObjectInspector LazyFloatObjectInspector = new LazyFloatObjectInspector();
+ public static final LazyDoubleObjectInspector LazyDoubleObjectInspector = new LazyDoubleObjectInspector();
+ public static final LazyStringObjectInspector LazyStringObjectInspector = new LazyStringObjectInspector(false,
+ (byte) '\\');
+ public static final LazyVoidObjectInspector LazyVoidObjectInspector = new LazyVoidObjectInspector();
+
+ private static HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>> cachedPrimitiveLazyInspectorCache = new HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>>();
+
+ static {
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BOOLEAN, LazyBooleanObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BYTE, LazyByteObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.SHORT, LazyShortObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.INT, LazyIntObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.LONG, LazyLongObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.FLOAT, LazyFloatObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.DOUBLE, LazyDoubleObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.STRING, LazyStringObjectInspector);
+ cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.VOID, LazyVoidObjectInspector);
+ }
+
+ /**
+ * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory.
+ *
+ * @param primitiveCategory
+ */
+ public static AbstractPrimitiveLazyObjectInspector<?> getPrimitiveLazyObjectInspector(
+ PrimitiveCategory primitiveCategory) {
+ AbstractPrimitiveLazyObjectInspector<?> result = cachedPrimitiveLazyInspectorCache.get(primitiveCategory);
+ if (result == null) {
+ throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory);
+ }
+ return result;
+ }
+
+ private PrimitiveObjectInspectorFactory() {
+ // prevent instantiation
+ }
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
new file mode 100644
index 0000000..7830c52
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
@@ -0,0 +1,16 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public interface IHiveParser {
+ /**
+ * parse one hive rwo into
+ *
+ * @param row
+ * @param objectInspector
+ * @param tb
+ */
+ public void parse(byte[] data, int start, int length, ArrayTupleBuilder tb) throws IOException;
+}
diff --git a/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
new file mode 100644
index 0000000..38e1b36
--- /dev/null
+++ b/hivesterix/hivesterix-serde/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
@@ -0,0 +1,174 @@
+package edu.uci.ics.hivesterix.serde.parser;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
+import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyShort;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.io.Text;
+
+import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+
+public class TextToBinaryTupleParser implements IHiveParser {
+ private int[] invertedIndex;
+ private int[] fieldEnds;
+ private int lastNecessaryFieldIndex;
+ private LazySimpleStructObjectInspector inputObjectInspector;
+ private List<? extends StructField> fieldRefs;
+
+ public TextToBinaryTupleParser(int[] outputColumnsOffset, ObjectInspector structInspector) {
+ int size = 0;
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0)
+ size++;
+ invertedIndex = new int[size];
+ for (int i = 0; i < outputColumnsOffset.length; i++)
+ if (outputColumnsOffset[i] >= 0) {
+ invertedIndex[outputColumnsOffset[i]] = i;
+ lastNecessaryFieldIndex = i;
+ }
+ fieldEnds = new int[outputColumnsOffset.length];
+ for (int i = 0; i < fieldEnds.length; i++)
+ fieldEnds[i] = 0;
+ inputObjectInspector = (LazySimpleStructObjectInspector) structInspector;
+ fieldRefs = inputObjectInspector.getAllStructFieldRefs();
+ }
+
+ @Override
+ public void parse(byte[] bytes, int start, int length, ArrayTupleBuilder tb) throws IOException {
+ byte separator = inputObjectInspector.getSeparator();
+ boolean lastColumnTakesRest = inputObjectInspector.getLastColumnTakesRest();
+ boolean isEscaped = inputObjectInspector.isEscaped();
+ byte escapeChar = inputObjectInspector.getEscapeChar();
+ DataOutput output = tb.getDataOutput();
+
+ int structByteEnd = start + length - 1;
+ int fieldId = 0;
+ int fieldByteEnd = start;
+
+ // Go through all bytes in the byte[]
+ while (fieldByteEnd <= structByteEnd && fieldId <= lastNecessaryFieldIndex) {
+ if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) {
+ // Reached the end of a field?
+ if (lastColumnTakesRest && fieldId == fieldEnds.length - 1) {
+ fieldByteEnd = structByteEnd;
+ }
+ fieldEnds[fieldId] = fieldByteEnd;
+ if (fieldId == fieldEnds.length - 1 || fieldByteEnd == structByteEnd) {
+ // for the case of null fields
+ for (int i = fieldId; i < fieldEnds.length; i++) {
+ fieldEnds[i] = fieldByteEnd;
+ }
+ break;
+ }
+ fieldByteEnd++;
+ fieldId++;
+ } else {
+ if (isEscaped && bytes[fieldByteEnd] == escapeChar && fieldByteEnd + 1 < structByteEnd) {
+ // ignore the char after escape_char
+ fieldByteEnd += 2;
+ } else {
+ fieldByteEnd++;
+ }
+ }
+ }
+
+ for (int i = 0; i < invertedIndex.length; i++) {
+ int index = invertedIndex[i];
+ StructField fieldRef = fieldRefs.get(index);
+ ObjectInspector inspector = fieldRef.getFieldObjectInspector();
+ Category category = inspector.getCategory();
+ int fieldStart = index == 0 ? 0 : fieldEnds[index - 1] + 1;
+ int fieldEnd = fieldEnds[index];
+ if (bytes[fieldEnd] == separator)
+ fieldEnd--;
+ int fieldLen = fieldEnd - fieldStart + 1;
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ break;
+ }
+ case BOOLEAN: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case BYTE: {
+ output.write(bytes[fieldStart]);
+ break;
+ }
+ case SHORT: {
+ short v = LazyShort.parseShort(bytes, fieldStart, fieldLen);
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case INT: {
+ int v = LazyInteger.parseInt(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVInt(output, v);
+ break;
+ }
+ case LONG: {
+ long v = LazyLong.parseLong(bytes, fieldStart, fieldLen);
+ LazyUtils.writeVLong(output, v);
+ break;
+ }
+ case FLOAT: {
+ float value = Float.parseFloat(Text.decode(bytes, fieldStart, fieldLen));
+ int v = Float.floatToIntBits(value);
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ break;
+ }
+ case DOUBLE: {
+ try {
+ double value = Double.parseDouble(Text.decode(bytes, fieldStart, fieldLen));
+ long v = Double.doubleToLongBits(value);
+ output.write((byte) (v >> 56));
+ output.write((byte) (v >> 48));
+ output.write((byte) (v >> 40));
+ output.write((byte) (v >> 32));
+ output.write((byte) (v >> 24));
+ output.write((byte) (v >> 16));
+ output.write((byte) (v >> 8));
+ output.write((byte) (v));
+ } catch (NumberFormatException e) {
+ throw e;
+ }
+ break;
+ }
+ case STRING: {
+ LazyUtils.writeVInt(output, fieldLen);
+ output.write(bytes, fieldStart, fieldLen);
+ break;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ break;
+ case STRUCT:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case LIST:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case MAP:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ case UNION:
+ throw new NotImplementedException("Unrecognized type: struct ");
+ }
+ tb.addFieldEndOffset();
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/pom.xml b/hivesterix/hivesterix-translator/pom.xml
new file mode 100644
index 0000000..b99d652
--- /dev/null
+++ b/hivesterix/hivesterix-translator/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hivesterix-translator</artifactId>
+ <name>hivesterix-translator</name>
+
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-common</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix-runtime</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
new file mode 100644
index 0000000..80b3fef
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
@@ -0,0 +1,807 @@
+package edu.uci.ics.hivesterix.logical.plan;
+
+import java.io.OutputStreamWriter;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
+import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
+import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
+import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
+
+@SuppressWarnings("rawtypes")
+public class HiveAlgebricksTranslator implements Translator {
+
+ private int currentVariable = 0;
+
+ private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
+
+ private boolean continueTraverse = true;
+
+ private IMetadataProvider<PartitionDesc, Object> metaData;
+
+ /**
+ * map variable name to the logical variable
+ */
+ private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map field name to LogicalVariable
+ */
+ private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
+
+ /**
+ * map logical variable to name
+ */
+ private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
+
+ /**
+ * asterix root operators
+ */
+ private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
+
+ /**
+ * a list of visitors
+ */
+ private List<Visitor> visitors = new ArrayList<Visitor>();
+
+ /**
+ * output writer to print things out
+ */
+ private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));
+
+ /**
+ * map a logical variable to type info
+ */
+ private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
+
+ @Override
+ public LogicalVariable getVariable(String fieldName, TypeInfo type) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ currentVariable++;
+ var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ }
+ return var;
+ }
+
+ @Override
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
+ currentVariable++;
+ LogicalVariable var = new LogicalVariable(currentVariable);
+ fieldToLogicalVariableMap.put(fieldName, var);
+ nameToLogicalVariableMap.put(var.toString(), var);
+ variableToType.put(var, type);
+ logicalVariableToFieldMap.put(var, fieldName);
+ return var;
+ }
+
+ @Override
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
+ String name = this.logicalVariableToFieldMap.get(oldVar);
+ if (name != null) {
+ fieldToLogicalVariableMap.put(name, newVar);
+ nameToLogicalVariableMap.put(newVar.toString(), newVar);
+ nameToLogicalVariableMap.put(oldVar.toString(), newVar);
+ logicalVariableToFieldMap.put(newVar, name);
+ }
+ }
+
+ @Override
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
+ return metaData;
+ }
+
+ /**
+ * only get an variable, without rewriting it
+ *
+ * @param fieldName
+ * @return
+ */
+ private LogicalVariable getVariableOnly(String fieldName) {
+ return fieldToLogicalVariableMap.get(fieldName);
+ }
+
+ private void updateVariable(String fieldName, LogicalVariable variable) {
+ LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
+ if (var == null) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ } else if (!var.equals(variable)) {
+ fieldToLogicalVariableMap.put(fieldName, variable);
+ nameToLogicalVariableMap.put(fieldName, variable);
+ }
+ }
+
+ /**
+ * get a list of logical variables from the schema
+ *
+ * @param schema
+ * @return
+ */
+ @Override
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
+ List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ List<String> names = schema.getNames();
+
+ for (String name : names)
+ variables.add(nameToLogicalVariableMap.get(name));
+ return variables;
+ }
+
+ /**
+ * get variable to typeinfo map
+ *
+ * @return
+ */
+ public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
+ return this.variableToType;
+ }
+
+ /**
+ * get the number of variables s
+ *
+ * @return
+ */
+ public int getVariableCounter() {
+ return currentVariable + 1;
+ }
+
+ /**
+ * translate from hive operator tree to asterix operator tree
+ *
+ * @param hive
+ * roots
+ * @return Algebricks roots
+ */
+ public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,
+ HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {
+ /**
+ * register visitors
+ */
+ visitors.add(new FilterVisitor());
+ visitors.add(new GroupByVisitor());
+ visitors.add(new JoinVisitor());
+ visitors.add(new LateralViewJoinVisitor());
+ visitors.add(new UnionVisitor());
+ visitors.add(new LimitVisitor());
+ visitors.add(new MapJoinVisitor());
+ visitors.add(new ProjectVisitor());
+ visitors.add(new SortVisitor());
+ visitors.add(new ExtractVisitor());
+ visitors.add(new TableScanWriteVisitor(aliasToPathMap));
+
+ List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(
+ parentOperator));
+ insertReplicateOperator(refList);
+ if (refList != null)
+ rootOperators.addAll(refList);
+ }
+
+ /**
+ * translate operator DAG
+ *
+ * @param hiveRoot
+ * @param AlgebricksParentOperator
+ * @return
+ */
+ private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
+ Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {
+
+ for (Operator hiveOperator : hiveRoot) {
+ continueTraverse = true;
+ Mutable<ILogicalOperator> currentOperatorRef = null;
+ if (hiveOperator.getType() == OperatorType.FILTER) {
+ FilterOperator fop = (FilterOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
+ ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.JOIN) {
+ JoinOperator fop = (JoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
+ LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
+ MapJoinOperator fop = (MapJoinOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ if (currentOperatorRef == null)
+ return null;
+ } else if (hiveOperator.getType() == OperatorType.SELECT) {
+ SelectOperator fop = (SelectOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
+ ExtractOperator fop = (ExtractOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
+ GroupByOperator fop = (GroupByOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
+ TableScanOperator fop = (TableScanOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.FILESINK) {
+ FileSinkOperator fop = (FileSinkOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.LIMIT) {
+ LimitOperator lop = (LimitOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UDTF) {
+ UDTFOperator lop = (UDTFOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null)
+ break;
+ }
+ } else if (hiveOperator.getType() == OperatorType.UNION) {
+ UnionOperator lop = (UnionOperator) hiveOperator;
+ for (Visitor visitor : visitors) {
+ currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);
+ if (currentOperatorRef != null) {
+ continueTraverse = true;
+ break;
+ } else
+ continueTraverse = false;
+ }
+ } else
+ ;
+ if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0
+ && continueTraverse) {
+ @SuppressWarnings("unchecked")
+ List<Operator> children = hiveOperator.getChildOperators();
+ if (currentOperatorRef == null)
+ currentOperatorRef = AlgebricksParentOperator;
+ translate(children, currentOperatorRef);
+ }
+ if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)
+ logicalOp.add(currentOperatorRef);
+ }
+ return logicalOp;
+ }
+
+ /**
+ * used in select, group by to get no-column-expression columns
+ *
+ * @param cols
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables) {
+
+ ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
+
+ /**
+ * variables to be appended in the assign operator
+ */
+ ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
+
+ // one variable can only be assigned once
+ for (ExprNodeDesc hiveExpr : cols) {
+ rewriteExpression(hiveExpr);
+
+ if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
+ String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();
+
+ // System.out.println("project expr: " + fieldName);
+
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());
+ desc2.setColumn(var.toString());
+ desc2.setTabAlias("");
+ variables.add(var);
+ } else {
+ LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = this.getVariableOnly(name);
+ variables.add(var);
+ }
+ } else {
+ Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
+ expressions.add(asterixExpr);
+ LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),
+ hiveExpr.getTypeInfo());
+ variables.add(var);
+ appendedVariables.add(var);
+ }
+ }
+
+ /**
+ * create an assign operator to deal with appending
+ */
+ ILogicalOperator assignOp = null;
+ if (appendedVariables.size() > 0) {
+ assignOp = new AssignOperator(appendedVariables, expressions);
+ assignOp.getInputs().add(parent);
+ }
+ return assignOp;
+ }
+
+ private ILogicalPlan plan;
+
+ public ILogicalPlan genLogicalPlan() {
+ plan = new ALogicalPlanImpl(rootOperators);
+ return plan;
+ }
+
+ public void printOperators() throws AlgebricksException {
+ LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
+ StringBuilder buffer = new StringBuilder();
+ PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
+ outputWriter.println(buffer);
+ outputWriter.println("rewritten variables: ");
+ outputWriter.flush();
+ printVariables();
+
+ }
+
+ public static void setOutputPrinter(PrintWriter writer) {
+ outputWriter = writer;
+ }
+
+ private void printVariables() {
+ Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();
+
+ for (Entry<String, LogicalVariable> entry : entries) {
+ outputWriter.println(entry.getKey() + " -> " + entry.getValue());
+ }
+ outputWriter.flush();
+ }
+
+ /**
+ * generate the object inspector for the output of an operator
+ *
+ * @param operator
+ * The Hive operator
+ * @return an ObjectInspector object
+ */
+ public Schema generateInputSchema(Operator operator) {
+ List<String> variableNames = new ArrayList<String>();
+ List<TypeInfo> typeList = new ArrayList<TypeInfo>();
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+
+ for (ColumnInfo col : columns) {
+ // typeList.add();
+ TypeInfo type = col.getType();
+ typeList.add(type);
+
+ String fieldName = col.getInternalName();
+ variableNames.add(fieldName);
+ }
+
+ return new Schema(variableNames, typeList);
+ }
+
+ /**
+ * rewrite the names of output columns for feature expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator) {
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+
+ for (ColumnInfo column : columns) {
+ String columnName = column.getTabAlias() + "." + column.getInternalName();
+ if (columnName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariable(columnName, column.getType());
+ column.setInternalName(var.toString());
+ }
+ }
+ }
+
+ @Override
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {
+
+ //printOperatorSchema(operator);
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ if (variables.size() != columns.size()) {
+ throw new IllegalStateException("output cardinality error " + operator.getName() + " variable size: "
+ + variables.size() + " expected " + columns.size());
+ }
+
+ for (int i = 0; i < variables.size(); i++) {
+ LogicalVariable var = variables.get(i);
+ ColumnInfo column = columns.get(i);
+ String fieldName = column.getTabAlias() + "." + column.getInternalName();
+ if (fieldName.indexOf("$$") < 0) {
+ updateVariable(fieldName, var);
+ column.setInternalName(var.toString());
+ }
+ }
+ //printOperatorSchema(operator);
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ fieldName = "null." + desc.getColumn();
+ var = getVariableOnly(fieldName);
+ if (var == null) {
+ throw new IllegalStateException(fieldName + " is wrong!!! ");
+ }
+ }
+ }
+ String name = this.logicalVariableToFieldMap.get(var);
+ var = getVariableOnly(name);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpression(desc);
+ }
+ }
+ }
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr) {
+ if (expr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
+ String fieldName = desc.getTabAlias() + "." + desc.getColumn();
+ if (fieldName.indexOf("$$") < 0) {
+ LogicalVariable var = getVariableOnly(fieldName);
+ desc.setColumn(var.toString());
+ }
+ } else {
+ if (expr.getChildren() != null && expr.getChildren().size() > 0) {
+ List<ExprNodeDesc> children = expr.getChildren();
+ for (ExprNodeDesc desc : children)
+ rewriteExpressionPartial(desc);
+ }
+ }
+ }
+
+ // private void printOperatorSchema(Operator operator) {
+ // // System.out.println(operator.getName());
+ // // List<ColumnInfo> columns = operator.getSchema().getSignature();
+ // // for (ColumnInfo column : columns) {
+ // // System.out.print(column.getTabAlias() + "." +
+ // // column.getInternalName() + " ");
+ // // }
+ // // System.out.println();
+ // }
+
+ /**
+ * translate scalar function expression
+ *
+ * @param hiveExpr
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {
+ ILogicalExpression AlgebricksExpr;
+
+ if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = hiveExpr.getChildren();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
+ GenericUDF genericUdf = funcExpr.getGenericUDF();
+ UDF udf = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
+ try {
+ udf = bridge.getUdfClass().newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * set up the hive function
+ */
+ Object hiveFunction = genericUdf;
+ if (udf != null)
+ hiveFunction = udf;
+
+ FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction
+ .getClass());
+ if (funcId == null) {
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());
+ }
+
+ Object functionInfo = null;
+ if (genericUdf instanceof GenericUDFBridge) {
+ functionInfo = funcExpr;
+ }
+
+ /**
+ * generate the function call expression
+ */
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, functionInfo), arguments);
+ AlgebricksExpr = AlgebricksFuncExpr;
+
+ } else if (hiveExpr instanceof ExprNodeColumnDesc) {
+ ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
+ LogicalVariable var = this.getVariable(column.getColumn());
+ AlgebricksExpr = new VariableReferenceExpression(var);
+
+ } else if (hiveExpr instanceof ExprNodeFieldDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else if (hiveExpr instanceof ExprNodeConstantDesc) {
+ ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
+ Object value = hiveConst.getValue();
+ AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));
+ } else if (hiveExpr instanceof ExprNodeNullDesc) {
+ FunctionIdentifier funcId;
+ funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);
+
+ ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(
+ funcId, hiveExpr));
+
+ AlgebricksExpr = AlgebricksFuncExpr;
+ } else {
+ throw new IllegalStateException("unknown hive expression");
+ }
+ return new MutableObject<ILogicalExpression>(AlgebricksExpr);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {
+
+ String UDAFName = aggregateDesc.getGenericUDAFName();
+
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ List<ExprNodeDesc> children = aggregateDesc.getParameters();
+
+ for (ExprNodeDesc child : children)
+ arguments.add(translateScalarFucntion(child));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + aggregateDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,
+ arguments);
+ return new MutableObject<ILogicalExpression>(aggregationExpression);
+ }
+
+ /**
+ * translate aggregation function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
+
+ String UDTFName = udtfDesc.getUDTFName();
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);
+ UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(
+ funcId, udtfDesc));
+ unnestingExpression.getArguments().add(argument);
+ return new MutableObject<ILogicalExpression>(unnestingExpression);
+ }
+
+ /**
+ * get typeinfo
+ */
+ @Override
+ public TypeInfo getType(LogicalVariable var) {
+ return variableToType.get(var);
+ }
+
+ /**
+ * get variable from variable name
+ */
+ @Override
+ public LogicalVariable getVariable(String name) {
+ return nameToLogicalVariableMap.get(name);
+ }
+
+ @Override
+ public LogicalVariable getVariableFromFieldName(String fieldName) {
+ return this.getVariableOnly(fieldName);
+ }
+
+ /**
+ * set the metadata provider
+ */
+ @Override
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {
+ this.metaData = metadata;
+ }
+
+ /**
+ * insert ReplicateOperator when necessary
+ */
+ private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
+ buildChildToParentsMapping(roots, childToParentsMap);
+ for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {
+ List<Mutable<ILogicalOperator>> pList = entry.getValue();
+ if (pList.size() > 1) {
+ ILogicalOperator rop = new ReplicateOperator(pList.size());
+ Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);
+ Mutable<ILogicalOperator> childRef = entry.getKey();
+ rop.getInputs().add(childRef);
+ for (Mutable<ILogicalOperator> parentRef : pList) {
+ ILogicalOperator parentOp = parentRef.getValue();
+ int index = parentOp.getInputs().indexOf(childRef);
+ parentOp.getInputs().set(index, ropRef);
+ }
+ }
+ }
+ }
+
+ /**
+ * build the mapping from child to Parents
+ *
+ * @param roots
+ * @param childToParentsMap
+ */
+ private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,
+ Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
+ for (Mutable<ILogicalOperator> opRef : roots) {
+ List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();
+ for (Mutable<ILogicalOperator> childRef : childRefs) {
+ List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
+ if (parentList == null) {
+ parentList = new ArrayList<Mutable<ILogicalOperator>>();
+ map.put(childRef, parentList);
+ }
+ if (!parentList.contains(opRef))
+ parentList.add(opRef);
+ }
+ buildChildToParentsMapping(childRefs, map);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
new file mode 100644
index 0000000..d5801a3
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
@@ -0,0 +1,35 @@
+package edu.uci.ics.hivesterix.logical.plan;
+
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class HiveLogicalPlanAndMetaData implements ILogicalPlanAndMetadata {
+
+ IMetadataProvider metadata;
+ ILogicalPlan plan;
+
+ public HiveLogicalPlanAndMetaData(ILogicalPlan plan, IMetadataProvider metadata) {
+ this.plan = plan;
+ this.metadata = metadata;
+ }
+
+ @Override
+ public IMetadataProvider getMetadataProvider() {
+ return metadata;
+ }
+
+ @Override
+ public ILogicalPlan getPlan() {
+ return plan;
+ }
+
+ @Override
+ public AlgebricksPartitionConstraint getClusterLocations() {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
new file mode 100644
index 0000000..0ea4e01
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
@@ -0,0 +1,8 @@
+package edu.uci.ics.hivesterix.logical.plan;
+
+public class HiveOperatorAnnotations {
+
+ // hints
+ public static final String LOCAL_GROUP_BY = "LOCAL_GROUP_BY";
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
new file mode 100644
index 0000000..1c67bae
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
@@ -0,0 +1,26 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+
+public class ExtractVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ExtractOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
new file mode 100644
index 0000000..9279144
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.FilterDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+
+public class FilterVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FilterOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+
+ FilterDesc desc = (FilterDesc) operator.getConf();
+ ExprNodeDesc predicate = desc.getPredicate();
+ t.rewriteExpression(predicate);
+
+ Mutable<ILogicalExpression> exprs = t.translateScalarFucntion(desc.getPredicate());
+ ILogicalOperator currentOperator = new SelectOperator(exprs);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
new file mode 100644
index 0000000..b7d5779
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
@@ -0,0 +1,264 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.GroupByDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.plan.api.OperatorType;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class GroupByVisitor extends DefaultVisitor {
+
+ private List<Mutable<ILogicalExpression>> AlgebricksAggs = new ArrayList<Mutable<ILogicalExpression>>();
+ private List<IFunctionInfo> localAggs = new ArrayList<IFunctionInfo>();
+ private boolean isDistinct = false;
+ private boolean gbyKeyNotRedKey = false;
+
+ @Override
+ public Mutable<ILogicalOperator> visit(GroupByOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ // get descriptors
+ GroupByDesc desc = (GroupByDesc) operator.getConf();
+ GroupByDesc.Mode mode = desc.getMode();
+
+ List<ExprNodeDesc> keys = desc.getKeys();
+ List<AggregationDesc> aggregators = desc.getAggregators();
+
+ Operator child = operator.getChildOperators().get(0);
+
+ if (child.getType() == OperatorType.REDUCESINK) {
+ List<ExprNodeDesc> partKeys = ((ReduceSinkDesc) child.getConf()).getPartitionCols();
+ if (keys.size() != partKeys.size())
+ gbyKeyNotRedKey = true;
+ }
+
+ if (mode == GroupByDesc.Mode.PARTIAL1 || mode == GroupByDesc.Mode.HASH || mode == GroupByDesc.Mode.COMPLETE
+ || (aggregators.size() == 0 && isDistinct == false) || gbyKeyNotRedKey) {
+ AlgebricksAggs.clear();
+ // add an assign operator if the key is not a column expression
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator currentOperator = null;
+ ILogicalOperator assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // get key variable expression list
+ List<Mutable<ILogicalExpression>> keyExprs = new ArrayList<Mutable<ILogicalExpression>>();
+ for (LogicalVariable var : keyVariables) {
+ keyExprs.add(t.translateScalarFucntion(new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, var
+ .toString(), "", false)));
+ }
+
+ if (aggregators.size() == 0) {
+ List<Mutable<ILogicalExpression>> distinctExprs = new ArrayList<Mutable<ILogicalExpression>>();
+ for (LogicalVariable var : keyVariables) {
+ Mutable<ILogicalExpression> varExpr = new MutableObject<ILogicalExpression>(
+ new VariableReferenceExpression(var));
+ distinctExprs.add(varExpr);
+ }
+ t.rewriteOperatorOutputSchema(keyVariables, operator);
+ isDistinct = true;
+ ILogicalOperator lop = new DistinctOperator(distinctExprs);
+ lop.getInputs().add(AlgebricksParentOperatorRef);
+ return new MutableObject<ILogicalOperator>(lop);
+ }
+
+ // get the pair<LogicalVariable, ILogicalExpression> list
+ List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> keyParameters = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
+ keyVariables.clear();
+ for (Mutable<ILogicalExpression> expr : keyExprs) {
+ LogicalVariable keyVar = t.getVariable(expr.getValue().toString(), TypeInfoFactory.unknownTypeInfo);
+ keyParameters.add(new Pair(keyVar, expr));
+ keyVariables.add(keyVar);
+ }
+
+ // get the parameters for the aggregator operator
+ ArrayList<LogicalVariable> aggVariables = new ArrayList<LogicalVariable>();
+ ArrayList<Mutable<ILogicalExpression>> aggExprs = new ArrayList<Mutable<ILogicalExpression>>();
+
+ // get the type of each aggregation function
+ HashMap<AggregationDesc, TypeInfo> aggToType = new HashMap<AggregationDesc, TypeInfo>();
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ int offset = keys.size();
+ for (int i = offset; i < columns.size(); i++) {
+ aggToType.put(aggregators.get(i - offset), columns.get(i).getType());
+ }
+
+ localAggs.clear();
+ // rewrite parameter expressions for all aggregators
+ for (AggregationDesc aggregator : aggregators) {
+ for (ExprNodeDesc parameter : aggregator.getParameters()) {
+ t.rewriteExpression(parameter);
+ }
+ Mutable<ILogicalExpression> aggExpr = t.translateAggregation(aggregator);
+ AbstractFunctionCallExpression localAggExpr = (AbstractFunctionCallExpression) aggExpr.getValue();
+ localAggs.add(localAggExpr.getFunctionInfo());
+
+ AggregationDesc logicalAgg = new AggregationDesc(aggregator.getGenericUDAFName(),
+ aggregator.getGenericUDAFEvaluator(), aggregator.getParameters(), aggregator.getDistinct(),
+ Mode.COMPLETE);
+ Mutable<ILogicalExpression> logicalAggExpr = t.translateAggregation(logicalAgg);
+
+ AlgebricksAggs.add(logicalAggExpr);
+ if (!gbyKeyNotRedKey)
+ aggExprs.add(logicalAggExpr);
+ else
+ aggExprs.add(aggExpr);
+
+ aggVariables.add(t.getVariable(aggregator.getExprString() + aggregator.getMode(),
+ aggToType.get(aggregator)));
+ }
+
+ if (child.getType() != OperatorType.REDUCESINK)
+ gbyKeyNotRedKey = false;
+
+ // get the sub plan list
+ AggregateOperator aggOperator = new AggregateOperator(aggVariables, aggExprs);
+ NestedTupleSourceOperator nestedTupleSource = new NestedTupleSourceOperator(
+ new MutableObject<ILogicalOperator>());
+ aggOperator.getInputs().add(new MutableObject<ILogicalOperator>(nestedTupleSource));
+
+ List<Mutable<ILogicalOperator>> subRoots = new ArrayList<Mutable<ILogicalOperator>>();
+ subRoots.add(new MutableObject<ILogicalOperator>(aggOperator));
+ ILogicalPlan subPlan = new ALogicalPlanImpl(subRoots);
+ List<ILogicalPlan> subPlans = new ArrayList<ILogicalPlan>();
+ subPlans.add(subPlan);
+
+ // create the group by operator
+ currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator(
+ keyParameters, new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(), subPlans);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ nestedTupleSource.getDataSourceReference().setValue(currentOperator);
+
+ List<LogicalVariable> outputVariables = new ArrayList<LogicalVariable>();
+ outputVariables.addAll(keyVariables);
+ outputVariables.addAll(aggVariables);
+ t.rewriteOperatorOutputSchema(outputVariables, operator);
+
+ if (gbyKeyNotRedKey) {
+ currentOperator.getAnnotations().put(HiveOperatorAnnotations.LOCAL_GROUP_BY, Boolean.TRUE);
+ }
+
+ HiveConf conf = ConfUtil.getHiveConf();
+ Boolean extGby = conf.getBoolean("hive.algebricks.groupby.external", false);
+
+ if (extGby && isSerializable(aggregators)) {
+ currentOperator.getAnnotations().put(OperatorAnnotations.USE_EXTERNAL_GROUP_BY, Boolean.TRUE);
+ }
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ } else {
+ isDistinct = false;
+ // rewrite parameter expressions for all aggregators
+ int i = 0;
+ for (AggregationDesc aggregator : aggregators) {
+ for (ExprNodeDesc parameter : aggregator.getParameters()) {
+ t.rewriteExpression(parameter);
+ }
+ Mutable<ILogicalExpression> agg = t.translateAggregation(aggregator);
+ AggregateFunctionCallExpression originalAgg = (AggregateFunctionCallExpression) AlgebricksAggs.get(i)
+ .getValue();
+ originalAgg.setStepOneAggregate(localAggs.get(i));
+ AggregateFunctionCallExpression currentAgg = (AggregateFunctionCallExpression) agg.getValue();
+ if (currentAgg.getFunctionInfo() != null) {
+ originalAgg.setTwoStep(true);
+ originalAgg.setStepTwoAggregate(currentAgg.getFunctionInfo());
+ }
+ i++;
+ }
+ return null;
+ }
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ if (!(downStream instanceof GroupByOperator)) {
+ return null;
+ }
+
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ List<ExprNodeDesc> values = desc.getValueCols();
+
+ // insert assign for keys
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+
+ // insert assign for values
+ ArrayList<LogicalVariable> valueVariables = new ArrayList<LogicalVariable>();
+ t.getAssignOperator(AlgebricksParentOperatorRef, values, valueVariables);
+
+ ArrayList<LogicalVariable> columns = new ArrayList<LogicalVariable>();
+ columns.addAll(keyVariables);
+ columns.addAll(valueVariables);
+
+ t.rewriteOperatorOutputSchema(columns, operator);
+ return null;
+ }
+
+ private boolean isSerializable(List<AggregationDesc> descs) throws AlgebricksException {
+ try {
+ for (AggregationDesc desc : descs) {
+ GenericUDAFEvaluator udaf = desc.getGenericUDAFEvaluator();
+ AggregationBuffer buf = udaf.getNewAggregationBuffer();
+ Class<?> bufferClass = buf.getClass();
+ Field[] fields = bufferClass.getDeclaredFields();
+ for (Field field : fields) {
+ field.setAccessible(true);
+ String type = field.getType().toString();
+ if (!(type.equals("int") || type.equals("long") || type.equals("float") || type.equals("double") || type
+ .equals("boolean"))) {
+ return false;
+ }
+ }
+
+ }
+ return true;
+ } catch (Exception e) {
+ throw new AlgebricksException(e);
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
new file mode 100644
index 0000000..ef346bc
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
@@ -0,0 +1,417 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
+import org.apache.hadoop.hive.ql.plan.JoinDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+@SuppressWarnings("rawtypes")
+public class JoinVisitor extends DefaultVisitor {
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<LogicalVariable>> reduceSinkToKeyVariables = new HashMap<Operator, List<LogicalVariable>>();
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<String>> reduceSinkToFieldNames = new HashMap<Operator, List<String>>();
+
+ /**
+ * reduce sink operator to variables
+ */
+ private HashMap<Operator, List<TypeInfo>> reduceSinkToTypes = new HashMap<Operator, List<TypeInfo>>();
+
+ /**
+ * map a join operator (in hive) to its parent operators (in hive)
+ */
+ private HashMap<Operator, List<Operator>> operatorToHiveParents = new HashMap<Operator, List<Operator>>();
+
+ /**
+ * map a join operator (in hive) to its parent operators (in asterix)
+ */
+ private HashMap<Operator, List<ILogicalOperator>> operatorToAsterixParents = new HashMap<Operator, List<ILogicalOperator>>();
+
+ /**
+ * the latest traversed reduce sink operator
+ */
+ private Operator latestReduceSink = null;
+
+ /**
+ * the latest generated parent for join
+ */
+ private ILogicalOperator latestAlgebricksOperator = null;
+
+ /**
+ * process a join operator
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(JoinOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) {
+ latestAlgebricksOperator = AlgebricksParentOperator.getValue();
+ translateJoinOperatorPreprocess(operator, t);
+ List<Operator> parents = operatorToHiveParents.get(operator);
+ if (parents.size() < operator.getParentOperators().size()) {
+ return null;
+ } else {
+ ILogicalOperator joinOp = translateJoinOperator(operator, AlgebricksParentOperator, t);
+ // clearStatus();
+ return new MutableObject<ILogicalOperator>(joinOp);
+ }
+ }
+
+ private void reorder(Byte[] order, List<ILogicalOperator> parents, List<Operator> hiveParents) {
+ ILogicalOperator[] lops = new ILogicalOperator[parents.size()];
+ Operator[] ops = new Operator[hiveParents.size()];
+
+ for (Operator op : hiveParents) {
+ ReduceSinkOperator rop = (ReduceSinkOperator) op;
+ ReduceSinkDesc rdesc = rop.getConf();
+ int tag = rdesc.getTag();
+
+ int index = -1;
+ for (int i = 0; i < order.length; i++)
+ if (order[i] == tag) {
+ index = i;
+ break;
+ }
+ lops[index] = parents.get(hiveParents.indexOf(op));
+ ops[index] = op;
+ }
+
+ parents.clear();
+ hiveParents.clear();
+
+ for (int i = 0; i < lops.length; i++) {
+ parents.add(lops[i]);
+ hiveParents.add(ops[i]);
+ }
+ }
+
+ /**
+ * translate a hive join operator to asterix join operator->assign
+ * operator->project operator
+ *
+ * @param parentOperator
+ * @param operator
+ * @return
+ */
+ private ILogicalOperator translateJoinOperator(Operator operator, Mutable<ILogicalOperator> parentOperator,
+ Translator t) {
+
+ JoinDesc joinDesc = (JoinDesc) operator.getConf();
+
+ // get the projection expression (already re-written) from each source
+ // table
+ Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
+ reorder(joinDesc.getTagOrder(), operatorToAsterixParents.get(operator), operatorToHiveParents.get(operator));
+
+ // make an reduce join operator
+ ILogicalOperator currentOperator = generateJoinTree(joinDesc.getCondsList(),
+ operatorToAsterixParents.get(operator), operatorToHiveParents.get(operator), 0, t);
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // add assign and project operator on top of a join
+ // output variables
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
+ while (iterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = iterator.next().getValue();
+ ILogicalOperator assignOperator = t.getAssignOperator(parentOperator, outputExprs, variables);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ }
+
+ ILogicalOperator po = new ProjectOperator(variables);
+ po.getInputs().add(parentOperator);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return po;
+ }
+
+ /**
+ * deal with reduce sink operator for the case of join
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator, Mutable<ILogicalOperator> parentOperator,
+ Translator t) {
+
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ if (!(downStream instanceof JoinOperator))
+ return null;
+
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ List<ExprNodeDesc> values = desc.getValueCols();
+ List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
+
+ /**
+ * rewrite key, value, paritioncol expressions
+ */
+ for (ExprNodeDesc key : keys)
+ t.rewriteExpression(key);
+ for (ExprNodeDesc value : values)
+ t.rewriteExpression(value);
+ for (ExprNodeDesc col : partitionCols)
+ t.rewriteExpression(col);
+
+ ILogicalOperator currentOperator = null;
+
+ // add assign operator for keys if necessary
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator assignOperator = t.getAssignOperator(parentOperator, keys, keyVariables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // add assign operator for values if necessary
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ assignOperator = t.getAssignOperator(parentOperator, values, variables);
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ // unified schema: key, value
+ ArrayList<LogicalVariable> unifiedKeyValues = new ArrayList<LogicalVariable>();
+ unifiedKeyValues.addAll(keyVariables);
+ for (LogicalVariable value : variables)
+ if (keyVariables.indexOf(value) < 0)
+ unifiedKeyValues.add(value);
+
+ // insert projection operator, it is a *must*,
+ // in hive, reduce sink sometimes also do the projection operator's
+ // task
+ currentOperator = new ProjectOperator(unifiedKeyValues);
+ currentOperator.getInputs().add(parentOperator);
+ parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+
+ reduceSinkToKeyVariables.put(operator, keyVariables);
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (LogicalVariable var : unifiedKeyValues) {
+ fieldNames.add(var.toString());
+ types.add(t.getType(var));
+ }
+ reduceSinkToFieldNames.put(operator, fieldNames);
+ reduceSinkToTypes.put(operator, types);
+ t.rewriteOperatorOutputSchema(variables, operator);
+
+ latestAlgebricksOperator = currentOperator;
+ latestReduceSink = operator;
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ /**
+ * partial rewrite a join operator
+ *
+ * @param operator
+ * @param t
+ */
+ private void translateJoinOperatorPreprocess(Operator operator, Translator t) {
+ JoinDesc desc = (JoinDesc) operator.getConf();
+ ReduceSinkDesc reduceSinkDesc = (ReduceSinkDesc) latestReduceSink.getConf();
+ int tag = reduceSinkDesc.getTag();
+
+ Map<Byte, List<ExprNodeDesc>> exprMap = desc.getExprs();
+ List<ExprNodeDesc> exprs = exprMap.get(Byte.valueOf((byte) tag));
+
+ for (ExprNodeDesc expr : exprs)
+ t.rewriteExpression(expr);
+
+ List<Operator> parents = operatorToHiveParents.get(operator);
+ if (parents == null) {
+ parents = new ArrayList<Operator>();
+ operatorToHiveParents.put(operator, parents);
+ }
+ parents.add(latestReduceSink);
+
+ List<ILogicalOperator> asterixParents = operatorToAsterixParents.get(operator);
+ if (asterixParents == null) {
+ asterixParents = new ArrayList<ILogicalOperator>();
+ operatorToAsterixParents.put(operator, asterixParents);
+ }
+ asterixParents.add(latestAlgebricksOperator);
+ }
+
+ // generate a join tree from a list of exchange/reducesink operator
+ // both exchanges and reduce sinks have the same order
+ private ILogicalOperator generateJoinTree(List<JoinCondDesc> conds, List<ILogicalOperator> exchanges,
+ List<Operator> reduceSinks, int offset, Translator t) {
+ // get a list of reduce sink descs (input descs)
+ int inputSize = reduceSinks.size() - offset;
+
+ if (inputSize == 2) {
+ ILogicalOperator currentRoot;
+
+ List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
+ for (int i = reduceSinks.size() - 1; i >= offset; i--)
+ reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i).getConf());
+
+ // get the object inspector for the join
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (int i = reduceSinks.size() - 1; i >= offset; i--) {
+ fieldNames.addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
+ types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
+ }
+
+ // get number of equality conjunctions in the final join condition
+ int size = reduceSinkDescs.get(0).getKeyCols().size();
+
+ // make up the join conditon expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (ReduceSinkDesc sink : reduceSinkDescs) {
+ keyPair.add(sink.getKeyCols().get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, equal-join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
+ exchanges.get(exchanges.size() - 1));
+ Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
+ exchanges.get(exchanges.size() - 2));
+ // get the join operator
+ if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ Mutable<ILogicalOperator> temp = leftBranch;
+ leftBranch = rightBranch;
+ rightBranch = temp;
+ } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ } else
+ currentRoot = new InnerJoinOperator(expression);
+
+ currentRoot.getInputs().add(leftBranch);
+ currentRoot.getInputs().add(rightBranch);
+
+ // rewriteOperatorOutputSchema(variables, operator);
+ return currentRoot;
+ } else {
+ // get the child join operator and insert and one-to-one exchange
+ ILogicalOperator joinSrcOne = generateJoinTree(conds, exchanges, reduceSinks, offset + 1, t);
+ // joinSrcOne.addInput(childJoin);
+
+ ILogicalOperator currentRoot;
+
+ List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
+ for (int i = offset; i < offset + 2; i++)
+ reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i).getConf());
+
+ // get the object inspector for the join
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (int i = offset; i < reduceSinks.size(); i++) {
+ fieldNames.addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
+ types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
+ }
+
+ // get number of equality conjunctions in the final join condition
+ int size = reduceSinkDescs.get(0).getKeyCols().size();
+
+ // make up the join condition expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (ReduceSinkDesc sink : reduceSinkDescs) {
+ keyPair.add(sink.getKeyCols().get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, full outer join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(joinSrcOne);
+ Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(exchanges.get(offset));
+
+ // get the join operator
+ if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ Mutable<ILogicalOperator> temp = leftBranch;
+ leftBranch = rightBranch;
+ rightBranch = temp;
+ } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
+ currentRoot = new LeftOuterJoinOperator(expression);
+ } else
+ currentRoot = new InnerJoinOperator(expression);
+
+ // set the inputs from Algebricks join operator
+ // add the current table
+ currentRoot.getInputs().add(leftBranch);
+ currentRoot.getInputs().add(rightBranch);
+
+ return currentRoot;
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
new file mode 100644
index 0000000..cc19364
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
@@ -0,0 +1,110 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+/**
+ * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
+ * This operator was implemented with the following operator DAG in mind.
+ * For a query such as
+ * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
+ * adid
+ * The top of the operator DAG will look similar to
+ * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
+ * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
+ * ....
+ * Rows from the table scan operator are first to a lateral view forward
+ * operator that just forwards the row and marks the start of a LV. The select
+ * operator on the left picks all the columns while the select operator on the
+ * right picks only the columns needed by the UDTF.
+ * The output of select in the left branch and output of the UDTF in the right
+ * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
+ * will generate > 1 row for every row received from the TS, while the left
+ * select operator will generate only one. For each row output from the TS, the
+ * LVJ outputs all possible rows that can be created by joining the row from the
+ * left select and one of the rows output from the UDTF.
+ * Additional lateral views can be supported by adding a similar DAG after the
+ * previous LVJ operator.
+ */
+
+@SuppressWarnings("rawtypes")
+public class LateralViewJoinVisitor extends DefaultVisitor {
+
+ private UDTFDesc udtf;
+
+ private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperatorRef);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ Operator parent0 = operator.getParentOperators().get(0);
+ ILogicalOperator parentOperator;
+ ILogicalExpression unnestArg;
+ if (parent0 instanceof UDTFOperator) {
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(1).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(1).getValue();
+ } else {
+ List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parents.get(0).getValue(), unnestVars);
+ unnestArg = new VariableReferenceExpression(unnestVars.get(0));
+ parentOperator = parents.get(0).getValue();
+ }
+
+ LogicalVariable var = t.getVariable(udtf.toString(), TypeInfoFactory.unknownTypeInfo);
+
+ Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(udtf, new MutableObject<ILogicalExpression>(
+ unnestArg));
+ ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
+
+ List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(parentOperator, outputVars);
+ outputVars.add(var);
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(parentOperator));
+
+ parents.clear();
+ udtf = null;
+ t.rewriteOperatorOutputSchema(outputVars, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+ udtf = (UDTFDesc) operator.getConf();
+
+ // populate the schema from upstream operator
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return null;
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
new file mode 100644
index 0000000..cc10f8f
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
@@ -0,0 +1,41 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.plan.LimitDesc;
+
+import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+
+public class LimitVisitor extends DefaultVisitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LimitOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ Schema currentSchema = t.generateInputSchema(operator.getParentOperators().get(0));
+
+ LimitDesc desc = (LimitDesc) operator.getConf();
+ int limit = desc.getLimit();
+ Integer limitValue = new Integer(limit);
+
+ ILogicalExpression expr = new ConstantExpression(new HivesterixConstantValue(limitValue));
+ ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LimitOperator(
+ expr, true);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+
+ operator.setSchema(operator.getParentOperators().get(0).getSchema());
+ List<LogicalVariable> latestOutputSchema = t.getVariablesFromSchema(currentSchema);
+ t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
new file mode 100644
index 0000000..4aba6a4
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
@@ -0,0 +1,171 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+@SuppressWarnings("rawtypes")
+public class MapJoinVisitor extends DefaultVisitor {
+
+ /**
+ * map a join operator (in hive) to its parent operators (in asterix)
+ */
+ private HashMap<Operator, List<Mutable<ILogicalOperator>>> opMap = new HashMap<Operator, List<Mutable<ILogicalOperator>>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
+ List<Operator<? extends Serializable>> joinSrc = operator.getParentOperators();
+ List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
+ if (parents == null) {
+ parents = new ArrayList<Mutable<ILogicalOperator>>();
+ opMap.put(operator, parents);
+ }
+ parents.add(AlgebricksParentOperatorRef);
+ if (joinSrc.size() != parents.size())
+ return null;
+
+ ILogicalOperator currentOperator;
+ // make an map join operator
+ // TODO: will have trouble for n-way joins
+ MapJoinDesc joinDesc = (MapJoinDesc) operator.getConf();
+
+ Map<Byte, List<ExprNodeDesc>> keyMap = joinDesc.getKeys();
+ // get the projection expression (already re-written) from each source
+ // table
+ Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
+
+ int inputSize = operator.getParentOperators().size();
+ // get a list of reduce sink descs (input descs)
+
+ // get the parent operator
+ List<Mutable<ILogicalOperator>> parentOps = parents;
+
+ List<String> fieldNames = new ArrayList<String>();
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ for (Operator ts : joinSrc) {
+ List<ColumnInfo> columns = ts.getSchema().getSignature();
+ for (ColumnInfo col : columns) {
+ fieldNames.add(col.getInternalName());
+ types.add(col.getType());
+ }
+ }
+
+ // get number of equality conjunctions in the final join condition
+ Set<Entry<Byte, List<ExprNodeDesc>>> keyEntries = keyMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> entry = keyEntries.iterator();
+
+ int size = 0;
+ if (entry.hasNext())
+ size = entry.next().getValue().size();
+
+ // make up the join conditon expression
+ List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
+ for (int i = 0; i < size; i++) {
+ // create a join key pair
+ List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
+ for (int j = 0; j < inputSize; j++) {
+ keyPair.add(keyMap.get(Byte.valueOf((byte) j)).get(i));
+ }
+ // create a hive equal condition
+ ExprNodeDesc equality = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo,
+ new GenericUDFOPEqual(), keyPair);
+ // add the equal condition to the conjunction list
+ joinConditionChildren.add(equality);
+ }
+ // get final conjunction expression
+ ExprNodeDesc conjunct = null;
+
+ if (joinConditionChildren.size() > 1)
+ conjunct = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
+ joinConditionChildren);
+ else if (joinConditionChildren.size() == 1)
+ conjunct = joinConditionChildren.get(0);
+ else {
+ // there is no join equality condition, full outer join
+ conjunct = new ExprNodeConstantDesc(TypeInfoFactory.booleanTypeInfo, new Boolean(true));
+ }
+ // get an ILogicalExpression from hive's expression
+ Mutable<ILogicalExpression> expression = t.translateScalarFucntion(conjunct);
+
+ ArrayList<LogicalVariable> left = new ArrayList<LogicalVariable>();
+ ArrayList<LogicalVariable> right = new ArrayList<LogicalVariable>();
+
+ Set<Entry<Byte, List<ExprNodeDesc>>> kentries = keyMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> kiterator = kentries.iterator();
+ int iteration = 0;
+ ILogicalOperator assignOperator = null;
+ while (kiterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = kiterator.next().getValue();
+
+ if (iteration == 0)
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, left);
+ else
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, right);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ iteration++;
+ }
+
+ List<Mutable<ILogicalOperator>> inputs = parentOps;
+
+ // get the join operator
+ currentOperator = new InnerJoinOperator(expression);
+
+ // set the inputs from asterix join operator
+ for (Mutable<ILogicalOperator> input : inputs)
+ currentOperator.getInputs().add(input);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // add assign and project operator
+ // output variables
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
+ Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
+ while (iterator.hasNext()) {
+ List<ExprNodeDesc> outputExprs = iterator.next().getValue();
+ assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, outputExprs, variables);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ }
+
+ currentOperator = new ProjectOperator(variables);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ // opMap.clear();
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
new file mode 100644
index 0000000..eb0922f
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
@@ -0,0 +1,56 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.SelectDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+
+public class ProjectVisitor extends DefaultVisitor {
+
+ /**
+ * translate project operator
+ */
+ @Override
+ public Mutable<ILogicalOperator> visit(SelectOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) {
+
+ SelectDesc desc = (SelectDesc) operator.getConf();
+
+ if (desc == null)
+ return null;
+
+ List<ExprNodeDesc> cols = desc.getColList();
+
+ if (cols == null)
+ return null;
+
+ // insert assign operator if necessary
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+
+ for (ExprNodeDesc expr : cols)
+ t.rewriteExpression(expr);
+
+ ILogicalOperator assignOp = t.getAssignOperator(AlgebricksParentOperator, cols, variables);
+ ILogicalOperator currentOperator = null;
+ if (assignOp != null) {
+ currentOperator = assignOp;
+ AlgebricksParentOperator = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ currentOperator = new ProjectOperator(variables);
+ currentOperator.getInputs().add(AlgebricksParentOperator);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
new file mode 100644
index 0000000..325b632
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
@@ -0,0 +1,113 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.OrderColumn;
+
+public class SortVisitor extends DefaultVisitor {
+
+ @SuppressWarnings("rawtypes")
+ @Override
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException {
+ ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
+ Operator downStream = (Operator) operator.getChildOperators().get(0);
+ List<ExprNodeDesc> keys = desc.getKeyCols();
+ if (!(downStream instanceof ExtractOperator && desc.getNumReducers() == 1 && keys.size() > 0)) {
+ return null;
+ }
+
+ List<ExprNodeDesc> schema = new ArrayList<ExprNodeDesc>();
+ List<ExprNodeDesc> values = desc.getValueCols();
+ List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
+ for (ExprNodeDesc key : keys) {
+ t.rewriteExpression(key);
+ }
+ for (ExprNodeDesc value : values) {
+ t.rewriteExpression(value);
+ }
+ for (ExprNodeDesc col : partitionCols) {
+ t.rewriteExpression(col);
+ }
+
+ // add a order-by operator and limit if any
+ List<Pair<IOrder, Mutable<ILogicalExpression>>> pairs = new ArrayList<Pair<IOrder, Mutable<ILogicalExpression>>>();
+ char[] orders = desc.getOrder().toCharArray();
+ int i = 0;
+ for (ExprNodeDesc key : keys) {
+ Mutable<ILogicalExpression> expr = t.translateScalarFucntion(key);
+ IOrder order = orders[i] == '+' ? OrderOperator.ASC_ORDER : OrderOperator.DESC_ORDER;
+
+ Pair<IOrder, Mutable<ILogicalExpression>> pair = new Pair<IOrder, Mutable<ILogicalExpression>>(order, expr);
+ pairs.add(pair);
+ i++;
+ }
+
+ // get input variables
+ ArrayList<LogicalVariable> inputVariables = new ArrayList<LogicalVariable>();
+ VariableUtilities.getProducedVariables(AlgebricksParentOperatorRef.getValue(), inputVariables);
+
+ ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
+ ILogicalOperator currentOperator;
+ ILogicalOperator assignOp = t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
+ if (assignOp != null) {
+ currentOperator = assignOp;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ OrderColumn[] keyColumns = new OrderColumn[keyVariables.size()];
+
+ for (int j = 0; j < keyColumns.length; j++)
+ keyColumns[j] = new OrderColumn(keyVariables.get(j), pairs.get(j).first.getKind());
+
+ // handle order operator
+ currentOperator = new OrderOperator(pairs);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+
+ // project back, remove generated sort-key columns if any
+ if (assignOp != null) {
+ currentOperator = new ProjectOperator(inputVariables);
+ currentOperator.getInputs().add(AlgebricksParentOperatorRef);
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ /**
+ * a special rule for hive's order by output schema of reduce sink
+ * operator only contains the columns
+ */
+ for (ExprNodeDesc value : values) {
+ schema.add(value);
+ }
+
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ ILogicalOperator assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef, schema, variables);
+ t.rewriteOperatorOutputSchema(variables, operator);
+
+ if (assignOperator != null) {
+ currentOperator = assignOperator;
+ AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(currentOperator);
+ }
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
new file mode 100644
index 0000000..3af1832
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
@@ -0,0 +1,135 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.ColumnInfo;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSink;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSource;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveMetaDataProvider;
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class TableScanWriteVisitor extends DefaultVisitor {
+
+ /**
+ * map from alias to partition desc
+ */
+ private HashMap<String, PartitionDesc> aliasToPathMap;
+
+ /**
+ * map from partition desc to data source
+ */
+ private HashMap<PartitionDesc, IDataSource<PartitionDesc>> dataSourceMap = new HashMap<PartitionDesc, IDataSource<PartitionDesc>>();
+
+ /**
+ * constructor
+ *
+ * @param aliasToPathMap
+ */
+ public TableScanWriteVisitor(HashMap<String, PartitionDesc> aliasToPathMap) {
+ this.aliasToPathMap = aliasToPathMap;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(TableScanOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ TableScanDesc desc = (TableScanDesc) operator.getConf();
+ if (desc == null) {
+ List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
+ VariableUtilities.getLiveVariables(AlgebricksParentOperator.getValue(), schema);
+ t.rewriteOperatorOutputSchema(schema, operator);
+ return null;
+ }
+
+ List<ColumnInfo> columns = operator.getSchema().getSignature();
+ for (int i = columns.size() - 1; i >= 0; i--)
+ if (columns.get(i).getIsVirtualCol() == true)
+ columns.remove(i);
+
+ // start with empty tuple operator
+ List<TypeInfo> types = new ArrayList<TypeInfo>();
+ ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
+ List<String> names = new ArrayList<String>();
+ for (ColumnInfo column : columns) {
+ types.add(column.getType());
+
+ LogicalVariable var = t.getVariableFromFieldName(column.getTabAlias() + "." + column.getInternalName());
+ LogicalVariable varNew;
+
+ if (var != null) {
+ varNew = t.getVariable(column.getTabAlias() + "." + column.getInternalName() + operator.toString(),
+ column.getType());
+ t.replaceVariable(var, varNew);
+ var = varNew;
+ } else
+ var = t.getNewVariable(column.getTabAlias() + "." + column.getInternalName(), column.getType());
+
+ variables.add(var);
+ names.add(column.getInternalName());
+ }
+ Schema currentSchema = new Schema(names, types);
+
+ String alias = desc.getAlias();
+ PartitionDesc partDesc = aliasToPathMap.get(alias);
+ IDataSource<PartitionDesc> dataSource = new HiveDataSource<PartitionDesc>(partDesc, currentSchema.getSchema());
+ ILogicalOperator currentOperator = new DataSourceScanOperator(variables, dataSource);
+
+ // set empty tuple source operator
+ ILogicalOperator ets = new EmptyTupleSourceOperator();
+ currentOperator.getInputs().add(new MutableObject<ILogicalOperator>(ets));
+
+ // setup data source
+ dataSourceMap.put(partDesc, dataSource);
+ t.rewriteOperatorOutputSchema(variables, operator);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
+
+ if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0)
+ return null;
+
+ Schema currentSchema = t.generateInputSchema(hiveOperator.getParentOperators().get(0));
+
+ IDataSink sink = new HiveDataSink(hiveOperator, currentSchema.getSchema());
+ List<Mutable<ILogicalExpression>> exprList = new ArrayList<Mutable<ILogicalExpression>>();
+ for (String column : currentSchema.getNames()) {
+ exprList.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(t.getVariable(column))));
+ }
+
+ ILogicalOperator currentOperator = new WriteOperator(exprList, sink);
+ if (AlgebricksParentOperator != null) {
+ currentOperator.getInputs().add(AlgebricksParentOperator);
+ }
+
+ IMetadataProvider<PartitionDesc, Object> metaData = new HiveMetaDataProvider<PartitionDesc, Object>(
+ hiveOperator, currentSchema, dataSourceMap);
+ t.setMetadataProvider(metaData);
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
new file mode 100644
index 0000000..96b9463
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
@@ -0,0 +1,62 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
+import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+public class UnionVisitor extends DefaultVisitor {
+
+ List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UnionOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+
+ parents.add(AlgebricksParentOperator);
+ if (operator.getParentOperators().size() > parents.size()) {
+ return null;
+ }
+
+ List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
+ List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
+
+ VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
+ VariableUtilities.getUsedVariables(parents.get(1).getValue(), rightVars);
+
+ List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
+ List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
+
+ for (int i = 0; i < leftVars.size(); i++) {
+ LogicalVariable unionVar = t.getVariable(
+ leftVars.get(i).getId() + "union" + AlgebricksParentOperator.hashCode(),
+ TypeInfoFactory.unknownTypeInfo);
+ unionVars.add(unionVar);
+ Triple<LogicalVariable, LogicalVariable, LogicalVariable> triple = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(
+ leftVars.get(i), rightVars.get(i), unionVar);
+ t.replaceVariable(leftVars.get(i), unionVar);
+ t.replaceVariable(rightVars.get(i), unionVar);
+ triples.add(triple);
+ }
+ ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator(
+ triples);
+ for (Mutable<ILogicalOperator> parent : parents)
+ currentOperator.getInputs().add(parent);
+
+ t.rewriteOperatorOutputSchema(unionVars, operator);
+ parents.clear();
+ return new MutableObject<ILogicalOperator>(currentOperator);
+ }
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
new file mode 100644
index 0000000..d298553
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
@@ -0,0 +1,145 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+
+/**
+ * a default empty implementation of visitor
+ *
+ * @author yingyib
+ */
+public class DefaultVisitor implements Visitor {
+
+ @Override
+ public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+ return null;
+ }
+
+ @Override
+ public Mutable<ILogicalOperator> visit(UnionOperator operator, Mutable<ILogicalOperator> AlgebricksParentOperator,
+ Translator t) throws AlgebricksException {
+ return null;
+ }
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
new file mode 100644
index 0000000..8aa139a
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
@@ -0,0 +1,170 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+import edu.uci.ics.hivesterix.runtime.jobgen.Schema;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+@SuppressWarnings("rawtypes")
+public interface Translator {
+
+ /**
+ * generate input schema
+ *
+ * @param operator
+ * @return
+ */
+ public Schema generateInputSchema(Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(List<LogicalVariable> vars, Operator operator);
+
+ /**
+ * rewrite the names of output columns for feture expression evaluators to
+ * use
+ *
+ * @param operator
+ */
+ public void rewriteOperatorOutputSchema(Operator operator);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpression(ExprNodeDesc expr);
+
+ /**
+ * rewrite an expression and substitute variables
+ *
+ * @param expr
+ * hive expression
+ */
+ public void rewriteExpressionPartial(ExprNodeDesc expr);
+
+ /**
+ * get an assign operator as a child of parent
+ *
+ * @param parent
+ * @param cols
+ * @param variables
+ * @return
+ */
+ public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,
+ ArrayList<LogicalVariable> variables);
+
+ /**
+ * get type for a logical variable
+ *
+ * @param var
+ * @return type info
+ */
+ public TypeInfo getType(LogicalVariable var);
+
+ /**
+ * translate an expression from hive to Algebricks
+ *
+ * @param desc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
+
+ /**
+ * translate an aggregation from hive to Algebricks
+ *
+ * @param aggregateDesc
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc);
+
+ /**
+ * translate unnesting (UDTF) function expression
+ *
+ * @param aggregator
+ * @return
+ */
+ public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
+
+ /**
+ * get variable from a schema
+ *
+ * @param schema
+ * @return
+ */
+ public List<LogicalVariable> getVariablesFromSchema(Schema schema);
+
+ /**
+ * get variable from name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariable(String name);
+
+ /**
+ * get variable from field name
+ *
+ * @param name
+ * @return
+ */
+ public LogicalVariable getVariableFromFieldName(String name);
+
+ /**
+ * get variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getVariable(String fieldName, TypeInfo type);
+
+ /**
+ * get new variable from name, type
+ *
+ * @param fieldName
+ * @param type
+ * @return
+ */
+ public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
+
+ /**
+ * set the metadata provider
+ *
+ * @param metadata
+ */
+ public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata);
+
+ /**
+ * get the metadata provider
+ *
+ * @param metadata
+ */
+ public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
+
+ /**
+ * replace the variable
+ *
+ * @param oldVar
+ * @param newVar
+ */
+ public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
+
+}
diff --git a/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
new file mode 100644
index 0000000..11ae357
--- /dev/null
+++ b/hivesterix/hivesterix-translator/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
@@ -0,0 +1,85 @@
+package edu.uci.ics.hivesterix.logical.plan.visitor.base;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.CollectOperator;
+import org.apache.hadoop.hive.ql.exec.ExtractOperator;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.FilterOperator;
+import org.apache.hadoop.hive.ql.exec.ForwardOperator;
+import org.apache.hadoop.hive.ql.exec.GroupByOperator;
+import org.apache.hadoop.hive.ql.exec.JoinOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
+import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
+import org.apache.hadoop.hive.ql.exec.LimitOperator;
+import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.MapOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
+import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ScriptOperator;
+import org.apache.hadoop.hive.ql.exec.SelectOperator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.UDTFOperator;
+import org.apache.hadoop.hive.ql.exec.UnionOperator;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+
+public interface Visitor {
+
+ public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LateralViewForwardOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LateralViewJoinOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(UDTFOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+
+ public Mutable<ILogicalOperator> visit(UnionOperator operator,
+ Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) throws AlgebricksException;
+}
diff --git a/hivesterix/pom.xml b/hivesterix/pom.xml
index 1553a2b..a8ef4ee 100644
--- a/hivesterix/pom.xml
+++ b/hivesterix/pom.xml
@@ -1,578 +1,139 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>edu.uci.ics.hivesterix</groupId>
- <artifactId>hivesterix</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <name>hivesterix</name>
- <dependencies>
- <dependency>
- <groupId>javax.servlet</groupId>
- <artifactId>servlet-api</artifactId>
- <version>2.5</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>4.8.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>args4j</groupId>
- <artifactId>args4j</artifactId>
- <version>2.0.12</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.json</groupId>
- <artifactId>json</artifactId>
- <version>20090211</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-server</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.eclipse.jetty</groupId>
- <artifactId>jetty-servlet</artifactId>
- <version>8.0.0.M1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- </dependency>
- <dependency>
- <groupId>jline</groupId>
- <artifactId>jline</artifactId>
- <version>0.9.94</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-core</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-connectionpool</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-enhancer</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.datanucleus</groupId>
- <artifactId>datanucleus-rdbms</artifactId>
- <version>2.0.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-dbcp</groupId>
- <artifactId>commons-dbcp</artifactId>
- <version>1.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-pool</groupId>
- <artifactId>commons-pool</artifactId>
- <version>1.5.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-collections</groupId>
- <artifactId>commons-collections</artifactId>
- <version>3.2.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-lang</groupId>
- <artifactId>commons-lang</artifactId>
- <version>2.4</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>javax</groupId>
- <artifactId>jdo2-api</artifactId>
- <version>2.3-ec</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.facebook</groupId>
- <artifactId>libfb303</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.thrift</groupId>
- <artifactId>libthrift</artifactId>
- <version>0.5.0</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.commons</groupId>
- <artifactId>cli</artifactId>
- <version>1.2</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache</groupId>
- <artifactId>log4j</artifactId>
- <version>1.2.15</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>antlr-runtime</artifactId>
- <version>3.0.1</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-cli</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-common</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-exec</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-hwi</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-jdbc</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-metastore</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-service</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-shims</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop.hive</groupId>
- <artifactId>hive-serde</artifactId>
- <version>0.7.0</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-api</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-cli</groupId>
- <artifactId>commons-cli</artifactId>
- <version>1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.slf4j</groupId>
- <artifactId>slf4j-log4j12</artifactId>
- <version>1.6.1</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-test</artifactId>
- <version>0.20.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>commons-logging</groupId>
- <artifactId>commons-logging</artifactId>
- <version>1.1.1</version>
- <type>jar</type>
- <classifier>api</classifier>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>com.google.guava</groupId>
- <artifactId>guava</artifactId>
- <version>r06</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.antlr</groupId>
- <artifactId>stringtemplate</artifactId>
- <version>3.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.derby</groupId>
- <artifactId>derby</artifactId>
- <version>10.8.1.2</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hbase</groupId>
- <artifactId>hbase</artifactId>
- <version>0.90.3</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>algebricks-compiler</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hyracks-control-cc</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>edu.uci.ics.hyracks</groupId>
- <artifactId>hyracks-control-nc</artifactId>
- <version>0.2.3-SNAPSHOT</version>
- <type>jar</type>
- <scope>compile</scope>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.0.2</version>
- <configuration>
- <source>1.7</source>
- <target>1.7</target>
- <encoding>UTF-8</encoding>
- <fork>true</fork>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-jar-plugin</artifactId>
- <executions>
- <execution>
- <id>patch</id>
- <goals>
- <goal>jar</goal>
- </goals>
- <phase>package</phase>
- <configuration>
- <classifier>patch</classifier>
- <finalName>a-hive</finalName>
- <includes>
- <include>**/org/apache/**</include>
- </includes>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.codehaus.mojo</groupId>
- <artifactId>appassembler-maven-plugin</artifactId>
- <version>1.3</version>
- <executions>
- <execution>
- <configuration>
- <programs>
- <program>
- <mainClass>edu.uci.ics.asterix.hive.cli.CliDriver</mainClass>
- <name>algebricks-hivesterix-cmd</name>
- </program>
- </programs>
- <repositoryLayout>flat</repositoryLayout>
- <repositoryName>lib</repositoryName>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>assemble</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-assembly-plugin</artifactId>
- <version>2.2-beta-5</version>
- <executions>
- <execution>
- <configuration>
- <descriptors>
- <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
- </descriptors>
- </configuration>
- <phase>package</phase>
- <goals>
- <goal>attached</goal>
- </goals>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-surefire-plugin</artifactId>
- <version>2.13</version>
- <configuration>
- <forkMode>pertest</forkMode>
- <argLine>-enableassertions -Xmx2047m -Dfile.encoding=UTF-8
- -Djava.util.logging.config.file=src/test/resources/logging.properties</argLine>
- <includes>
- <include>**/test/optimizer/*TestSuite.java</include>
- <include>**/test/optimizer/*Test.java</include>
- <include>**/test/runtimefunction/*TestSuite.java</include>
- <include>**/test/runtimefunction/*Test.java</include>
- </includes>
- </configuration>
- </plugin>
- <plugin>
- <artifactId>maven-resources-plugin</artifactId>
- <version>2.5</version>
- <executions>
- <execution>
- <id>copy-resources</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/bin</outputDirectory>
- <resources>
- <resource>
- <directory>resource/bin</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-conf</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/conf</outputDirectory>
- <resources>
- <resource>
- <directory>conf</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-asterix</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/asterix</outputDirectory>
- <resources>
- <resource>
- <directory>resource/asterix</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-asterix-dbg</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/asterix_dbg</outputDirectory>
- <resources>
- <resource>
- <directory>resource/asterix_dbg</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-hivesterix</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/hivesterix</outputDirectory>
- <resources>
- <resource>
- <directory>resource/hivesterix</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-conf2</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>target/appassembler/hivesterix/conf</outputDirectory>
- <resources>
- <resource>
- <directory>conf</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- <execution>
- <id>copy-data</id>
- <!-- here the phase you need -->
- <phase>package</phase>
- <goals>
- <goal>copy-resources</goal>
- </goals>
- <configuration>
- <outputDirectory>data</outputDirectory>
- <resources>
- <resource>
- <directory>resource/data</directory>
- </resource>
- </resources>
- </configuration>
- </execution>
- </executions>
- </plugin>
- <plugin>
- <artifactId>maven-clean-plugin</artifactId>
- <version>2.5</version>
- <configuration>
- <filesets>
- <fileset>
- <directory>.</directory>
- <includes>
- <include>metastore*</include>
- <include>hadoop*</include>
- <include>edu*</include>
- <include>tmp*</include>
- <include>build*</include>
- <include>target*</include>
- <include>log*</include>
- <include>derby.log</include>
- <include>ClusterController*</include>
- </includes>
- </fileset>
- </filesets>
- </configuration>
- </plugin>
- </plugins>
- </build>
- <repositories>
- <repository>
- <releases>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>warn</checksumPolicy>
- </releases>
- <snapshots>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>fail</checksumPolicy>
- </snapshots>
- <id>third-party</id>
- <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
- </repository>
- <repository>
- <releases>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>warn</checksumPolicy>
- </releases>
- <snapshots>
- <enabled>true</enabled>
- <updatePolicy>always</updatePolicy>
- <checksumPolicy>fail</checksumPolicy>
- </snapshots>
- <id>hyracks-public-release</id>
- <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
- </repository>
- </repositories>
- <distributionManagement>
- <!-- use the following if you're not using a snapshot version. -->
- <repository>
- <id>hivesterix</id>
- <name>hivesterix</name>
- <url>scp://obelix.ics.uci.edu/nexus/content/groups/hivesterix-public</url>
- </repository>
- <!-- use the following if you ARE using a snapshot version. -->
- <snapshotRepository>
- <id>hivesterix</id>
- <name>Repository Name</name>
- <url>scp://obelix.ics.uci.edu/nexus/content/groups/hivesterix-public</url>
- </snapshotRepository>
- </distributionManagement>
-</project>
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hivesterix</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <packaging>pom</packaging>
+ <name>hivesterix</name>
+
+ <properties>
+ <jvm.extraargs />
+ </properties>
+
+ <profiles>
+ <profile>
+ <id>macosx</id>
+ <activation>
+ <os>
+ <name>mac os x</name>
+ </os>
+ <jdk>1.7</jdk>
+ </activation>
+ <properties>
+ <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider</jvm.extraargs>
+ </properties>
+ </profile>
+ </profiles>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-release-plugin</artifactId>
+ <version>2.0</version>
+ <configuration>
+ <goals>package source:jar javadoc:jar deploy:deploy</goals>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>versions-maven-plugin</artifactId>
+ <version>1.2</version>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.13</version>
+ <configuration>
+ <forkMode>pertest</forkMode>
+ <argLine>-enableassertions
+ -Djava.util.logging.config.file=${user.home}/logging.properties
+ -Xdebug
+ -Xrunjdwp:transport=dt_socket,server=y,address=8000,suspend=n
+ ${jvm.extraargs}</argLine>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <reporting>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-changelog-plugin</artifactId>
+ <version>2.2</version>
+ </plugin>
+ </plugins>
+ </reporting>
+
+ <distributionManagement>
+ <repository>
+ <id>hyracks-releases</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-releases/</url>
+ </repository>
+ <snapshotRepository>
+ <id>hyracks-snapshots</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-snapshots/</url>
+ </snapshotRepository>
+ </distributionManagement>
+
+ <repositories>
+ <repository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ </repository>
+ <repository>
+ <id>jboss-public</id>
+ <url>https://repository.jboss.org/nexus/content/groups/public/</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>third-party</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/third-party</url>
+ </repository>
+ <repository>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ <checksumPolicy>fail</checksumPolicy>
+ </snapshots>
+ <id>hyracks-public-release</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/repositories/hyracks-public-releases</url>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>hyracks-public</id>
+ <url>http://obelix.ics.uci.edu/nexus/content/groups/hyracks-public/</url>
+ <releases>
+ <updatePolicy>always</updatePolicy>
+ </releases>
+ </pluginRepository>
+ </pluginRepositories>
+
+ <modules>
+ <module>hivesterix-runtime</module>
+ <module>hivesterix-translator</module>
+ <module>hivesterix-optimizer</module>
+ <module>hivesterix-serde</module>
+ <module>hivesterix-dist</module>
+ <module>hivesterix-common</module>
+ </modules>
+</project>
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
deleted file mode 100644
index 3c84566..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-/**
- * some constants for expression
- *
- * @author yingyib
- *
- */
-public class ExpressionConstant {
-
- /**
- * name space for function identifier
- */
- public static String NAMESPACE = "hive";
-
- /**
- * field expression: modeled as function in Algebricks
- */
- public static String FIELDACCESS = "fieldaccess";
-
- /**
- * null string: modeled as null in Algebricks
- */
- public static String NULL = "null";
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
deleted file mode 100644
index 18380f7..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
+++ /dev/null
@@ -1,83 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.HashMap;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-
-public class HiveAlgebricksBuiltInFunctionMap {
-
- /**
- * hive auqa builtin function map instance
- */
- public static HiveAlgebricksBuiltInFunctionMap INSTANCE = new HiveAlgebricksBuiltInFunctionMap();
-
- /**
- * hive to Algebricks function name mapping
- */
- private HashMap<String, FunctionIdentifier> hiveToAlgebricksMap = new HashMap<String, FunctionIdentifier>();
-
- /**
- * Algebricks to hive function name mapping
- */
- private HashMap<FunctionIdentifier, String> AlgebricksToHiveMap = new HashMap<FunctionIdentifier, String>();
-
- /**
- * the bi-directional mapping between hive functions and Algebricks
- * functions
- */
- private HiveAlgebricksBuiltInFunctionMap() {
- hiveToAlgebricksMap.put("and", AlgebricksBuiltinFunctions.AND);
- hiveToAlgebricksMap.put("or", AlgebricksBuiltinFunctions.OR);
- hiveToAlgebricksMap.put("!", AlgebricksBuiltinFunctions.NOT);
- hiveToAlgebricksMap.put("not", AlgebricksBuiltinFunctions.NOT);
- hiveToAlgebricksMap.put("=", AlgebricksBuiltinFunctions.EQ);
- hiveToAlgebricksMap.put("<>", AlgebricksBuiltinFunctions.NEQ);
- hiveToAlgebricksMap.put(">", AlgebricksBuiltinFunctions.GT);
- hiveToAlgebricksMap.put("<", AlgebricksBuiltinFunctions.LT);
- hiveToAlgebricksMap.put(">=", AlgebricksBuiltinFunctions.GE);
- hiveToAlgebricksMap.put("<=", AlgebricksBuiltinFunctions.LE);
-
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.AND, "and");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.OR, "or");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "!");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "not");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.EQ, "=");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NEQ, "<>");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GT, ">");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LT, "<");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GE, ">=");
- AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LE, "<=");
- }
-
- /**
- * get hive function name from Algebricks function identifier
- *
- * @param AlgebricksId
- * @return hive
- */
- public String getHiveFunctionName(FunctionIdentifier AlgebricksId) {
- return AlgebricksToHiveMap.get(AlgebricksId);
- }
-
- /**
- * get hive UDF or Generic class's corresponding built-in functions
- *
- * @param funcClass
- * @return function identifier
- */
- public FunctionIdentifier getAlgebricksFunctionId(Class<?> funcClass) {
- Description annotation = (Description) funcClass
- .getAnnotation(Description.class);
- String hiveUDFName = "";
- if (annotation == null) {
- hiveUDFName = null;
- return null;
- } else {
- hiveUDFName = annotation.name();
- return hiveToAlgebricksMap.get(hiveUDFName);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
deleted file mode 100644
index afb7d39..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
+++ /dev/null
@@ -1,200 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-public class HiveExpressionTypeComputer implements IExpressionTypeComputer {
-
- public static IExpressionTypeComputer INSTANCE = new HiveExpressionTypeComputer();
-
- @Override
- public Object getType(ILogicalExpression expr,
- IMetadataProvider<?, ?> metadataProvider,
- IVariableTypeEnvironment env) throws AlgebricksException {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
-
- /**
- * argument expressions, types, object inspectors
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
-
- /**
- * get types of argument
- */
- for (Mutable<ILogicalExpression> argument : arguments) {
- TypeInfo type = (TypeInfo) getType(argument.getValue(),
- metadataProvider, env);
- argumentTypes.add(type);
- }
-
- ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes
- .size()];
-
- /**
- * get object inspector
- */
- for (int i = 0; i < argumentTypes.size(); i++) {
- childrenOIs[i] = TypeInfoUtils
- .getStandardWritableObjectInspectorFromTypeInfo(argumentTypes
- .get(i));
- }
-
- /**
- * type inference for scalar function
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
-
- FunctionIdentifier AlgebricksId = funcInfo
- .getFunctionIdentifier();
- Object functionInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE
- .getHiveFunctionName(AlgebricksId);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry
- .getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
- } else if (functionInfo != null) {
- /**
- * for GenericUDFBridge: we should not call get type of this
- * hive expression, because parameters may have been
- * changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) functionInfo;
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(AlgebricksId.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- /**
- * doing the actual type inference
- */
- ObjectInspector oi = null;
- try {
- oi = udf.initialize(childrenOIs);
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(oi);
- return exprType;
-
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * type inference for aggregation function
- */
- GenericUDAFEvaluator result = aggregateDesc
- .getGenericUDAFEvaluator();
-
- ObjectInspector returnOI = null;
- try {
- returnOI = result
- .init(aggregateDesc.getMode(), childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- GenericUDTF udtf = hiveDesc.getGenericUDTF();
- ObjectInspector returnOI = null;
- try {
- returnOI = udtf.initialize(childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else {
- throw new IllegalStateException(
- "unrecognized function expression "
- + expr.getClass().getName());
- }
- } else if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo type = (TypeInfo) env.getVarType(var);
- return type;
- } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
- /**
- * get type for constant, from its java class
- */
- ConstantExpression constExpr = (ConstantExpression) expr;
- HivesterixConstantValue value = (HivesterixConstantValue) constExpr
- .getValue();
- TypeInfo type = TypeInfoFactory
- .getPrimitiveTypeInfoFromJavaPrimitive(value.getObject()
- .getClass());
- return type;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
deleted file mode 100644
index 220bd00..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.io.Serializable;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class HiveFunctionInfo implements IFunctionInfo, Serializable {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * primary function identifier
- */
- private transient FunctionIdentifier fid;
-
- /**
- * secondary function identifier: function name
- */
- private transient Object secondaryFid;
-
- public HiveFunctionInfo(FunctionIdentifier fid, Object secondFid) {
- this.fid = fid;
- this.secondaryFid = secondFid;
- }
-
- @Override
- public FunctionIdentifier getFunctionIdentifier() {
- return fid;
- }
-
- public Object getInfo() {
- return secondaryFid;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
deleted file mode 100644
index 8dea691..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
+++ /dev/null
@@ -1,84 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-
-/**
- * generate merge aggregation expression from an aggregation expression
- *
- * @author yingyib
- *
- */
-public class HiveMergeAggregationExpressionFactory implements
- IMergeAggregationExpressionFactory {
-
- public static IMergeAggregationExpressionFactory INSTANCE = new HiveMergeAggregationExpressionFactory();
-
- @Override
- public ILogicalExpression createMergeAggregation(ILogicalExpression expr,
- IOptimizationContext context) throws AlgebricksException {
- /**
- * type inference for scalar function
- */
- if (expr instanceof AggregateFunctionCallExpression) {
- AggregateFunctionCallExpression funcExpr = (AggregateFunctionCallExpression) expr;
- /**
- * hive aggregation info
- */
- AggregationDesc aggregator = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- LogicalVariable inputVar = context.newVar();
- ExprNodeDesc col = new ExprNodeColumnDesc(
- TypeInfoFactory.voidTypeInfo, inputVar.toString(), null,
- false);
- ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- parameters.add(col);
-
- GenericUDAFEvaluator.Mode mergeMode;
- if (aggregator.getMode() == GenericUDAFEvaluator.Mode.PARTIAL1)
- mergeMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else if (aggregator.getMode() == GenericUDAFEvaluator.Mode.COMPLETE)
- mergeMode = GenericUDAFEvaluator.Mode.FINAL;
- else
- mergeMode = aggregator.getMode();
- AggregationDesc mergeDesc = new AggregationDesc(
- aggregator.getGenericUDAFName(),
- aggregator.getGenericUDAFEvaluator(), parameters,
- aggregator.getDistinct(), mergeMode);
-
- String UDAFName = mergeDesc.getGenericUDAFName();
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- arguments.add(new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(inputVar)));
-
- FunctionIdentifier funcId = new FunctionIdentifier(
- ExpressionConstant.NAMESPACE, UDAFName + "("
- + mergeDesc.getMode() + ")");
- HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, mergeDesc);
- AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(
- funcInfo, false, arguments);
- return aggregationExpression;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
deleted file mode 100644
index 10c9b8a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
+++ /dev/null
@@ -1,15 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
-
-public class HiveNullableTypeComputer implements INullableTypeComputer {
-
- public static INullableTypeComputer INSTANCE = new HiveNullableTypeComputer();
-
- @Override
- public Object makeNullableType(Object type) throws AlgebricksException {
- return type;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
deleted file mode 100644
index 7062e26..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
+++ /dev/null
@@ -1,116 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-public class HivePartialAggregationTypeComputer implements
- IPartialAggregationTypeComputer {
-
- public static IPartialAggregationTypeComputer INSTANCE = new HivePartialAggregationTypeComputer();
-
- @Override
- public Object getType(ILogicalExpression expr,
- IVariableTypeEnvironment env,
- IMetadataProvider<?, ?> metadataProvider)
- throws AlgebricksException {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- IExpressionTypeComputer tc = HiveExpressionTypeComputer.INSTANCE;
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
-
- /**
- * argument expressions, types, object inspectors
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
-
- /**
- * get types of argument
- */
- for (Mutable<ILogicalExpression> argument : arguments) {
- TypeInfo type = (TypeInfo) tc.getType(argument.getValue(),
- metadataProvider, env);
- argumentTypes.add(type);
- }
-
- ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes
- .size()];
-
- /**
- * get object inspector
- */
- for (int i = 0; i < argumentTypes.size(); i++) {
- childrenOIs[i] = TypeInfoUtils
- .getStandardWritableObjectInspectorFromTypeInfo(argumentTypes
- .get(i));
- }
-
- /**
- * type inference for scalar function
- */
- if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * type inference for aggregation function
- */
- GenericUDAFEvaluator result = aggregateDesc
- .getGenericUDAFEvaluator();
-
- ObjectInspector returnOI = null;
- try {
- returnOI = result.init(
- getPartialMode(aggregateDesc.getMode()),
- childrenOIs);
- } catch (HiveException e) {
- e.printStackTrace();
- }
- TypeInfo exprType = TypeInfoUtils
- .getTypeInfoFromObjectInspector(returnOI);
- return exprType;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-
- private Mode getPartialMode(Mode mode) {
- Mode partialMode;
- if (mode == Mode.FINAL)
- partialMode = Mode.PARTIAL2;
- else if (mode == Mode.COMPLETE)
- partialMode = Mode.PARTIAL1;
- else
- partialMode = mode;
- return partialMode;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
deleted file mode 100644
index de9cea6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
-
-public class HivesterixConstantValue implements IAlgebricksConstantValue {
-
- private Object object;
-
- public HivesterixConstantValue(Object object) {
- this.setObject(object);
- }
-
- @Override
- public boolean isFalse() {
- return object == Boolean.FALSE;
- }
-
- @Override
- public boolean isNull() {
- return object == null;
- }
-
- @Override
- public boolean isTrue() {
- return object == Boolean.TRUE;
- }
-
- public void setObject(Object object) {
- this.object = object;
- }
-
- public Object getObject() {
- return object;
- }
-
- @Override
- public String toString() {
- return object.toString();
- }
-
- @Override
- public boolean equals(Object o) {
- if (!(o instanceof HivesterixConstantValue)) {
- return false;
- }
- HivesterixConstantValue v2 = (HivesterixConstantValue) o;
- return object.equals(v2.getObject());
- }
-
- @Override
- public int hashCode() {
- return object.hashCode();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java
deleted file mode 100644
index 2b1d191..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/expression/Schema.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.logical.expression;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-
-public class Schema implements Serializable {
-
- private static final long serialVersionUID = 1L;
-
- private List<String> fieldNames;
-
- private List<TypeInfo> fieldTypes;
-
- public Schema(List<String> fieldNames, List<TypeInfo> fieldTypes) {
- this.fieldNames = fieldNames;
- this.fieldTypes = fieldTypes;
- }
-
- public ObjectInspector toObjectInspector() {
- return LazyUtils.getLazyObjectInspector(fieldNames, fieldTypes);
- }
-
- public List<String> getNames() {
- return fieldNames;
- }
-
- public List<TypeInfo> getTypes() {
- return fieldTypes;
- }
-
- public Object[] getSchema() {
- return fieldTypes.toArray();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
deleted file mode 100644
index 2765e44..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
+++ /dev/null
@@ -1,849 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan;
-
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDF;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ExtractVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.FilterVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.GroupByVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.JoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LateralViewJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.LimitVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.MapJoinVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.ProjectVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.SortVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.TableScanWriteVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.UnionVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Visitor;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
-
-@SuppressWarnings("rawtypes")
-public class HiveAlgebricksTranslator implements Translator {
-
- private int currentVariable = 0;
-
- private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();
-
- private boolean continueTraverse = true;
-
- private IMetadataProvider<PartitionDesc, Object> metaData;
-
- /**
- * map variable name to the logical variable
- */
- private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map field name to LogicalVariable
- */
- private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();
-
- /**
- * map logical variable to name
- */
- private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();
-
- /**
- * asterix root operators
- */
- private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();
-
- /**
- * a list of visitors
- */
- private List<Visitor> visitors = new ArrayList<Visitor>();
-
- /**
- * output writer to print things out
- */
- private static PrintWriter outputWriter = new PrintWriter(
- new OutputStreamWriter(System.out));
-
- /**
- * map a logical variable to type info
- */
- private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();
-
- @Override
- public LogicalVariable getVariable(String fieldName, TypeInfo type) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- currentVariable++;
- var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- }
- return var;
- }
-
- @Override
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {
- currentVariable++;
- LogicalVariable var = new LogicalVariable(currentVariable);
- fieldToLogicalVariableMap.put(fieldName, var);
- nameToLogicalVariableMap.put(var.toString(), var);
- variableToType.put(var, type);
- logicalVariableToFieldMap.put(var, fieldName);
- return var;
- }
-
- @Override
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {
- String name = this.logicalVariableToFieldMap.get(oldVar);
- if (name != null) {
- fieldToLogicalVariableMap.put(name, newVar);
- nameToLogicalVariableMap.put(newVar.toString(), newVar);
- nameToLogicalVariableMap.put(oldVar.toString(), newVar);
- logicalVariableToFieldMap.put(newVar, name);
- }
- }
-
- @Override
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {
- return metaData;
- }
-
- /**
- * only get an variable, without rewriting it
- *
- * @param fieldName
- * @return
- */
- private LogicalVariable getVariableOnly(String fieldName) {
- return fieldToLogicalVariableMap.get(fieldName);
- }
-
- private void updateVariable(String fieldName, LogicalVariable variable) {
- LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);
- if (var == null) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- } else if (!var.equals(variable)) {
- fieldToLogicalVariableMap.put(fieldName, variable);
- nameToLogicalVariableMap.put(fieldName, variable);
- }
- }
-
- /**
- * get a list of logical variables from the schema
- *
- * @param schema
- * @return
- */
- @Override
- public List<LogicalVariable> getVariablesFromSchema(Schema schema) {
- List<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- List<String> names = schema.getNames();
-
- for (String name : names)
- variables.add(nameToLogicalVariableMap.get(name));
- return variables;
- }
-
- /**
- * get variable to typeinfo map
- *
- * @return
- */
- public HashMap<LogicalVariable, TypeInfo> getVariableContext() {
- return this.variableToType;
- }
-
- /**
- * get the number of variables s
- *
- * @return
- */
- public int getVariableCounter() {
- return currentVariable + 1;
- }
-
- /**
- * translate from hive operator tree to asterix operator tree
- *
- * @param hive
- * roots
- * @return Algebricks roots
- */
- public void translate(List<Operator> hiveRoot,
- ILogicalOperator parentOperator,
- HashMap<String, PartitionDesc> aliasToPathMap)
- throws AlgebricksException {
- /**
- * register visitors
- */
- visitors.add(new FilterVisitor());
- visitors.add(new GroupByVisitor());
- visitors.add(new JoinVisitor());
- visitors.add(new LateralViewJoinVisitor());
- visitors.add(new UnionVisitor());
- visitors.add(new LimitVisitor());
- visitors.add(new MapJoinVisitor());
- visitors.add(new ProjectVisitor());
- visitors.add(new SortVisitor());
- visitors.add(new ExtractVisitor());
- visitors.add(new TableScanWriteVisitor(aliasToPathMap));
-
- List<Mutable<ILogicalOperator>> refList = translate(hiveRoot,
- new MutableObject<ILogicalOperator>(parentOperator));
- insertReplicateOperator(refList);
- if (refList != null)
- rootOperators.addAll(refList);
- }
-
- /**
- * translate operator DAG
- *
- * @param hiveRoot
- * @param AlgebricksParentOperator
- * @return
- */
- private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,
- Mutable<ILogicalOperator> AlgebricksParentOperator)
- throws AlgebricksException {
-
- for (Operator hiveOperator : hiveRoot) {
- continueTraverse = true;
- Mutable<ILogicalOperator> currentOperatorRef = null;
- if (hiveOperator.getType() == OperatorType.FILTER) {
- FilterOperator fop = (FilterOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {
- ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.JOIN) {
- JoinOperator fop = (JoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {
- LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {
- MapJoinOperator fop = (MapJoinOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- if (currentOperatorRef == null)
- return null;
- } else if (hiveOperator.getType() == OperatorType.SELECT) {
- SelectOperator fop = (SelectOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.EXTRACT) {
- ExtractOperator fop = (ExtractOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.GROUPBY) {
- GroupByOperator fop = (GroupByOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {
- TableScanOperator fop = (TableScanOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.FILESINK) {
- FileSinkOperator fop = (FileSinkOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(fop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.LIMIT) {
- LimitOperator lop = (LimitOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UDTF) {
- UDTFOperator lop = (UDTFOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null)
- break;
- }
- } else if (hiveOperator.getType() == OperatorType.UNION) {
- UnionOperator lop = (UnionOperator) hiveOperator;
- for (Visitor visitor : visitors) {
- currentOperatorRef = visitor.visit(lop,
- AlgebricksParentOperator, this);
- if (currentOperatorRef != null) {
- continueTraverse = true;
- break;
- } else
- continueTraverse = false;
- }
- } else
- ;
- if (hiveOperator.getChildOperators() != null
- && hiveOperator.getChildOperators().size() > 0
- && continueTraverse) {
- @SuppressWarnings("unchecked")
- List<Operator> children = hiveOperator.getChildOperators();
- if (currentOperatorRef == null)
- currentOperatorRef = AlgebricksParentOperator;
- translate(children, currentOperatorRef);
- }
- if (hiveOperator.getChildOperators() == null
- || hiveOperator.getChildOperators().size() == 0)
- logicalOp.add(currentOperatorRef);
- }
- return logicalOp;
- }
-
- /**
- * used in select, group by to get no-column-expression columns
- *
- * @param cols
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent,
- List<ExprNodeDesc> cols, ArrayList<LogicalVariable> variables) {
-
- ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();
-
- /**
- * variables to be appended in the assign operator
- */
- ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();
-
- // one variable can only be assigned once
- for (ExprNodeDesc hiveExpr : cols) {
- rewriteExpression(hiveExpr);
-
- if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;
- String fieldName = desc2.getTabAlias() + "."
- + desc2.getColumn();
-
- // System.out.println("project expr: " + fieldName);
-
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(fieldName,
- hiveExpr.getTypeInfo());
- desc2.setColumn(var.toString());
- desc2.setTabAlias("");
- variables.add(var);
- } else {
- LogicalVariable var = nameToLogicalVariableMap.get(desc2
- .getColumn());
- String name = this.logicalVariableToFieldMap.get(var);
- var = this.getVariableOnly(name);
- variables.add(var);
- }
- } else {
- Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);
- expressions.add(asterixExpr);
- LogicalVariable var = getVariable(hiveExpr.getExprString()
- + asterixExpr.hashCode(), hiveExpr.getTypeInfo());
- variables.add(var);
- appendedVariables.add(var);
- }
- }
-
- /**
- * create an assign operator to deal with appending
- */
- ILogicalOperator assignOp = null;
- if (appendedVariables.size() > 0) {
- assignOp = new AssignOperator(appendedVariables, expressions);
- assignOp.getInputs().add(parent);
- }
- return assignOp;
- }
-
- private ILogicalPlan plan;
-
- public ILogicalPlan genLogicalPlan() {
- plan = new ALogicalPlanImpl(rootOperators);
- return plan;
- }
-
- public void printOperators() throws AlgebricksException {
- LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
- StringBuilder buffer = new StringBuilder();
- PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
- outputWriter.println(buffer);
- outputWriter.println("rewritten variables: ");
- outputWriter.flush();
- printVariables();
-
- }
-
- public static void setOutputPrinter(PrintWriter writer) {
- outputWriter = writer;
- }
-
- private void printVariables() {
- Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap
- .entrySet();
-
- for (Entry<String, LogicalVariable> entry : entries) {
- outputWriter.println(entry.getKey() + " -> " + entry.getValue());
- }
- outputWriter.flush();
- }
-
- /**
- * generate the object inspector for the output of an operator
- *
- * @param operator
- * The Hive operator
- * @return an ObjectInspector object
- */
- public Schema generateInputSchema(Operator operator) {
- List<String> variableNames = new ArrayList<String>();
- List<TypeInfo> typeList = new ArrayList<TypeInfo>();
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo col : columns) {
- // typeList.add();
- TypeInfo type = col.getType();
- typeList.add(type);
-
- String fieldName = col.getInternalName();
- variableNames.add(fieldName);
- }
-
- return new Schema(variableNames, typeList);
- }
-
- /**
- * rewrite the names of output columns for feature expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator) {
- List<ColumnInfo> columns = operator.getSchema().getSignature();
-
- for (ColumnInfo column : columns) {
- String columnName = column.getTabAlias() + "."
- + column.getInternalName();
- if (columnName.indexOf("$$") < 0) {
- LogicalVariable var = getVariable(columnName, column.getType());
- column.setInternalName(var.toString());
- }
- }
- }
-
- @Override
- public void rewriteOperatorOutputSchema(List<LogicalVariable> variables,
- Operator operator) {
-
- //printOperatorSchema(operator);
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- if (variables.size() != columns.size()) {
- throw new IllegalStateException("output cardinality error "
- + operator.getName() + " variable size: "
- + variables.size() + " expected " + columns.size());
- }
-
- for (int i = 0; i < variables.size(); i++) {
- LogicalVariable var = variables.get(i);
- ColumnInfo column = columns.get(i);
- String fieldName = column.getTabAlias() + "."
- + column.getInternalName();
- if (fieldName.indexOf("$$") < 0) {
- updateVariable(fieldName, var);
- column.setInternalName(var.toString());
- }
- }
- //printOperatorSchema(operator);
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- fieldName = "null." + desc.getColumn();
- var = getVariableOnly(fieldName);
- if (var == null) {
- throw new IllegalStateException(fieldName
- + " is wrong!!! ");
- }
- }
- }
- String name = this.logicalVariableToFieldMap.get(var);
- var = getVariableOnly(name);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpression(desc);
- }
- }
- }
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr) {
- if (expr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;
- String fieldName = desc.getTabAlias() + "." + desc.getColumn();
- if (fieldName.indexOf("$$") < 0) {
- LogicalVariable var = getVariableOnly(fieldName);
- desc.setColumn(var.toString());
- }
- } else {
- if (expr.getChildren() != null && expr.getChildren().size() > 0) {
- List<ExprNodeDesc> children = expr.getChildren();
- for (ExprNodeDesc desc : children)
- rewriteExpressionPartial(desc);
- }
- }
- }
-
- // private void printOperatorSchema(Operator operator) {
- // // System.out.println(operator.getName());
- // // List<ColumnInfo> columns = operator.getSchema().getSignature();
- // // for (ColumnInfo column : columns) {
- // // System.out.print(column.getTabAlias() + "." +
- // // column.getInternalName() + " ");
- // // }
- // // System.out.println();
- // }
-
- /**
- * translate scalar function expression
- *
- * @param hiveExpr
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(
- ExprNodeDesc hiveExpr) {
- ILogicalExpression AlgebricksExpr;
-
- if (hiveExpr instanceof ExprNodeGenericFuncDesc) {
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = hiveExpr.getChildren();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;
- GenericUDF genericUdf = funcExpr.getGenericUDF();
- UDF udf = null;
- if (genericUdf instanceof GenericUDFBridge) {
- GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;
- try {
- udf = bridge.getUdfClass().newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- /**
- * set up the hive function
- */
- Object hiveFunction = genericUdf;
- if (udf != null)
- hiveFunction = udf;
-
- FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE
- .getAlgebricksFunctionId(hiveFunction.getClass());
- if (funcId == null) {
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,
- hiveFunction.getClass().getName());
- }
-
- Object functionInfo = null;
- if (genericUdf instanceof GenericUDFBridge) {
- functionInfo = funcExpr;
- }
-
- /**
- * generate the function call expression
- */
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(
- new HiveFunctionInfo(funcId, functionInfo), arguments);
- AlgebricksExpr = AlgebricksFuncExpr;
-
- } else if (hiveExpr instanceof ExprNodeColumnDesc) {
- ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;
- LogicalVariable var = this.getVariable(column.getColumn());
- AlgebricksExpr = new VariableReferenceExpression(var);
-
- } else if (hiveExpr instanceof ExprNodeFieldDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,
- ExpressionConstant.FIELDACCESS);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(
- new HiveFunctionInfo(funcId, hiveExpr));
- AlgebricksExpr = AlgebricksFuncExpr;
- } else if (hiveExpr instanceof ExprNodeConstantDesc) {
- ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;
- Object value = hiveConst.getValue();
- AlgebricksExpr = new ConstantExpression(
- new HivesterixConstantValue(value));
- } else if (hiveExpr instanceof ExprNodeNullDesc) {
- FunctionIdentifier funcId;
- funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,
- ExpressionConstant.NULL);
-
- ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(
- new HiveFunctionInfo(funcId, hiveExpr));
-
- AlgebricksExpr = AlgebricksFuncExpr;
- } else {
- throw new IllegalStateException("unknown hive expression");
- }
- return new MutableObject<ILogicalExpression>(AlgebricksExpr);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(
- AggregationDesc aggregateDesc) {
-
- String UDAFName = aggregateDesc.getGenericUDAFName();
-
- List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
- List<ExprNodeDesc> children = aggregateDesc.getParameters();
-
- for (ExprNodeDesc child : children)
- arguments.add(translateScalarFucntion(child));
-
- FunctionIdentifier funcId = new FunctionIdentifier(
- ExpressionConstant.NAMESPACE, UDAFName + "("
- + aggregateDesc.getMode() + ")");
- HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);
- AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(
- funcInfo, false, arguments);
- return new MutableObject<ILogicalExpression>(aggregationExpression);
- }
-
- /**
- * translate aggregation function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(
- UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {
-
- String UDTFName = udtfDesc.getUDTFName();
-
- FunctionIdentifier funcId = new FunctionIdentifier(
- ExpressionConstant.NAMESPACE, UDTFName);
- UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(
- new HiveFunctionInfo(funcId, udtfDesc));
- unnestingExpression.getArguments().add(argument);
- return new MutableObject<ILogicalExpression>(unnestingExpression);
- }
-
- /**
- * get typeinfo
- */
- @Override
- public TypeInfo getType(LogicalVariable var) {
- return variableToType.get(var);
- }
-
- /**
- * get variable from variable name
- */
- @Override
- public LogicalVariable getVariable(String name) {
- return nameToLogicalVariableMap.get(name);
- }
-
- @Override
- public LogicalVariable getVariableFromFieldName(String fieldName) {
- return this.getVariableOnly(fieldName);
- }
-
- /**
- * set the metadata provider
- */
- @Override
- public void setMetadataProvider(
- IMetadataProvider<PartitionDesc, Object> metadata) {
- this.metaData = metadata;
- }
-
- /**
- * insert ReplicateOperator when necessary
- */
- private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();
- buildChildToParentsMapping(roots, childToParentsMap);
- for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap
- .entrySet()) {
- List<Mutable<ILogicalOperator>> pList = entry.getValue();
- if (pList.size() > 1) {
- ILogicalOperator rop = new ReplicateOperator(pList.size());
- Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(
- rop);
- Mutable<ILogicalOperator> childRef = entry.getKey();
- rop.getInputs().add(childRef);
- for (Mutable<ILogicalOperator> parentRef : pList) {
- ILogicalOperator parentOp = parentRef.getValue();
- int index = parentOp.getInputs().indexOf(childRef);
- parentOp.getInputs().set(index, ropRef);
- }
- }
- }
- }
-
- /**
- * build the mapping from child to Parents
- *
- * @param roots
- * @param childToParentsMap
- */
- private void buildChildToParentsMapping(
- List<Mutable<ILogicalOperator>> roots,
- Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {
- for (Mutable<ILogicalOperator> opRef : roots) {
- List<Mutable<ILogicalOperator>> childRefs = opRef.getValue()
- .getInputs();
- for (Mutable<ILogicalOperator> childRef : childRefs) {
- List<Mutable<ILogicalOperator>> parentList = map.get(childRef);
- if (parentList == null) {
- parentList = new ArrayList<Mutable<ILogicalOperator>>();
- map.put(childRef, parentList);
- }
- if (!parentList.contains(opRef))
- parentList.add(opRef);
- }
- buildChildToParentsMapping(childRefs, map);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
deleted file mode 100644
index 494e796..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveLogicalPlanAndMetaData.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan;
-
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class HiveLogicalPlanAndMetaData implements ILogicalPlanAndMetadata {
-
- IMetadataProvider metadata;
- ILogicalPlan plan;
-
- public HiveLogicalPlanAndMetaData(ILogicalPlan plan,
- IMetadataProvider metadata) {
- this.plan = plan;
- this.metadata = metadata;
- }
-
- @Override
- public IMetadataProvider getMetadataProvider() {
- return metadata;
- }
-
- @Override
- public ILogicalPlan getPlan() {
- return plan;
- }
-
- @Override
- public AlgebricksPartitionConstraint getClusterLocations() {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
deleted file mode 100644
index 0d234fb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveOperatorAnnotations.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan;
-
-public class HiveOperatorAnnotations {
-
- // hints
- public static final String LOCAL_GROUP_BY = "LOCAL_GROUP_BY";
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
deleted file mode 100644
index 9a84164..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ExtractVisitor.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-
-public class ExtractVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(ExtractOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
deleted file mode 100644
index b276ba9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/FilterVisitor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.FilterDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
-
-public class FilterVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(FilterOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
-
- FilterDesc desc = (FilterDesc) operator.getConf();
- ExprNodeDesc predicate = desc.getPredicate();
- t.rewriteExpression(predicate);
-
- Mutable<ILogicalExpression> exprs = t.translateScalarFucntion(desc
- .getPredicate());
- ILogicalOperator currentOperator = new SelectOperator(exprs);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
deleted file mode 100644
index d2180a3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/GroupByVisitor.java
+++ /dev/null
@@ -1,291 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.lang.reflect.Field;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.GroupByDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.plan.api.OperatorType;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.NestedTupleSourceOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.plan.ALogicalPlanImpl;
-
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class GroupByVisitor extends DefaultVisitor {
-
- private List<Mutable<ILogicalExpression>> AlgebricksAggs = new ArrayList<Mutable<ILogicalExpression>>();
- private List<IFunctionInfo> localAggs = new ArrayList<IFunctionInfo>();
- private boolean isDistinct = false;
- private boolean gbyKeyNotRedKey = false;
-
- @Override
- public Mutable<ILogicalOperator> visit(GroupByOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
-
- // get descriptors
- GroupByDesc desc = (GroupByDesc) operator.getConf();
- GroupByDesc.Mode mode = desc.getMode();
-
- List<ExprNodeDesc> keys = desc.getKeys();
- List<AggregationDesc> aggregators = desc.getAggregators();
-
- Operator child = operator.getChildOperators().get(0);
-
- if (child.getType() == OperatorType.REDUCESINK) {
- List<ExprNodeDesc> partKeys = ((ReduceSinkDesc) child.getConf())
- .getPartitionCols();
- if (keys.size() != partKeys.size())
- gbyKeyNotRedKey = true;
- }
-
- if (mode == GroupByDesc.Mode.PARTIAL1 || mode == GroupByDesc.Mode.HASH
- || mode == GroupByDesc.Mode.COMPLETE
- || (aggregators.size() == 0 && isDistinct == false)
- || gbyKeyNotRedKey) {
- AlgebricksAggs.clear();
- // add an assign operator if the key is not a column expression
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator currentOperator = null;
- ILogicalOperator assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, keys, keyVariables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // get key variable expression list
- List<Mutable<ILogicalExpression>> keyExprs = new ArrayList<Mutable<ILogicalExpression>>();
- for (LogicalVariable var : keyVariables) {
- keyExprs.add(t.translateScalarFucntion(new ExprNodeColumnDesc(
- TypeInfoFactory.intTypeInfo, var.toString(), "", false)));
- }
-
- if (aggregators.size() == 0) {
- List<Mutable<ILogicalExpression>> distinctExprs = new ArrayList<Mutable<ILogicalExpression>>();
- for (LogicalVariable var : keyVariables) {
- Mutable<ILogicalExpression> varExpr = new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(var));
- distinctExprs.add(varExpr);
- }
- t.rewriteOperatorOutputSchema(keyVariables, operator);
- isDistinct = true;
- ILogicalOperator lop = new DistinctOperator(distinctExprs);
- lop.getInputs().add(AlgebricksParentOperatorRef);
- return new MutableObject<ILogicalOperator>(lop);
- }
-
- // get the pair<LogicalVariable, ILogicalExpression> list
- List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> keyParameters = new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>();
- keyVariables.clear();
- for (Mutable<ILogicalExpression> expr : keyExprs) {
- LogicalVariable keyVar = t.getVariable(expr.getValue()
- .toString(), TypeInfoFactory.unknownTypeInfo);
- keyParameters.add(new Pair(keyVar, expr));
- keyVariables.add(keyVar);
- }
-
- // get the parameters for the aggregator operator
- ArrayList<LogicalVariable> aggVariables = new ArrayList<LogicalVariable>();
- ArrayList<Mutable<ILogicalExpression>> aggExprs = new ArrayList<Mutable<ILogicalExpression>>();
-
- // get the type of each aggregation function
- HashMap<AggregationDesc, TypeInfo> aggToType = new HashMap<AggregationDesc, TypeInfo>();
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- int offset = keys.size();
- for (int i = offset; i < columns.size(); i++) {
- aggToType.put(aggregators.get(i - offset), columns.get(i)
- .getType());
- }
-
- localAggs.clear();
- // rewrite parameter expressions for all aggregators
- for (AggregationDesc aggregator : aggregators) {
- for (ExprNodeDesc parameter : aggregator.getParameters()) {
- t.rewriteExpression(parameter);
- }
- Mutable<ILogicalExpression> aggExpr = t
- .translateAggregation(aggregator);
- AbstractFunctionCallExpression localAggExpr = (AbstractFunctionCallExpression) aggExpr
- .getValue();
- localAggs.add(localAggExpr.getFunctionInfo());
-
- AggregationDesc logicalAgg = new AggregationDesc(
- aggregator.getGenericUDAFName(),
- aggregator.getGenericUDAFEvaluator(),
- aggregator.getParameters(), aggregator.getDistinct(),
- Mode.COMPLETE);
- Mutable<ILogicalExpression> logicalAggExpr = t
- .translateAggregation(logicalAgg);
-
- AlgebricksAggs.add(logicalAggExpr);
- if (!gbyKeyNotRedKey)
- aggExprs.add(logicalAggExpr);
- else
- aggExprs.add(aggExpr);
-
- aggVariables.add(t.getVariable(aggregator.getExprString()
- + aggregator.getMode(), aggToType.get(aggregator)));
- }
-
- if (child.getType() != OperatorType.REDUCESINK)
- gbyKeyNotRedKey = false;
-
- // get the sub plan list
- AggregateOperator aggOperator = new AggregateOperator(aggVariables,
- aggExprs);
- NestedTupleSourceOperator nestedTupleSource = new NestedTupleSourceOperator(
- new MutableObject<ILogicalOperator>());
- aggOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(nestedTupleSource));
-
- List<Mutable<ILogicalOperator>> subRoots = new ArrayList<Mutable<ILogicalOperator>>();
- subRoots.add(new MutableObject<ILogicalOperator>(aggOperator));
- ILogicalPlan subPlan = new ALogicalPlanImpl(subRoots);
- List<ILogicalPlan> subPlans = new ArrayList<ILogicalPlan>();
- subPlans.add(subPlan);
-
- // create the group by operator
- currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator(
- keyParameters,
- new ArrayList<Pair<LogicalVariable, Mutable<ILogicalExpression>>>(),
- subPlans);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- nestedTupleSource.getDataSourceReference()
- .setValue(currentOperator);
-
- List<LogicalVariable> outputVariables = new ArrayList<LogicalVariable>();
- outputVariables.addAll(keyVariables);
- outputVariables.addAll(aggVariables);
- t.rewriteOperatorOutputSchema(outputVariables, operator);
-
- if (gbyKeyNotRedKey) {
- currentOperator.getAnnotations().put(
- HiveOperatorAnnotations.LOCAL_GROUP_BY, Boolean.TRUE);
- }
-
- HiveConf conf = ConfUtil.getHiveConf();
- Boolean extGby = conf.getBoolean(
- "hive.algebricks.groupby.external", false);
-
- if (extGby && isSerializable(aggregators)) {
- currentOperator.getAnnotations()
- .put(OperatorAnnotations.USE_EXTERNAL_GROUP_BY,
- Boolean.TRUE);
- }
- return new MutableObject<ILogicalOperator>(currentOperator);
- } else {
- isDistinct = false;
- // rewrite parameter expressions for all aggregators
- int i = 0;
- for (AggregationDesc aggregator : aggregators) {
- for (ExprNodeDesc parameter : aggregator.getParameters()) {
- t.rewriteExpression(parameter);
- }
- Mutable<ILogicalExpression> agg = t
- .translateAggregation(aggregator);
- AggregateFunctionCallExpression originalAgg = (AggregateFunctionCallExpression) AlgebricksAggs
- .get(i).getValue();
- originalAgg.setStepOneAggregate(localAggs.get(i));
- AggregateFunctionCallExpression currentAgg = (AggregateFunctionCallExpression) agg
- .getValue();
- if (currentAgg.getFunctionInfo() != null) {
- originalAgg.setTwoStep(true);
- originalAgg.setStepTwoAggregate(currentAgg
- .getFunctionInfo());
- }
- i++;
- }
- return null;
- }
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- if (!(downStream instanceof GroupByOperator)) {
- return null;
- }
-
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- List<ExprNodeDesc> keys = desc.getKeyCols();
- List<ExprNodeDesc> values = desc.getValueCols();
-
- // insert assign for keys
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- t.getAssignOperator(AlgebricksParentOperatorRef, keys, keyVariables);
-
- // insert assign for values
- ArrayList<LogicalVariable> valueVariables = new ArrayList<LogicalVariable>();
- t.getAssignOperator(AlgebricksParentOperatorRef, values, valueVariables);
-
- ArrayList<LogicalVariable> columns = new ArrayList<LogicalVariable>();
- columns.addAll(keyVariables);
- columns.addAll(valueVariables);
-
- t.rewriteOperatorOutputSchema(columns, operator);
- return null;
- }
-
- private boolean isSerializable(List<AggregationDesc> descs)
- throws AlgebricksException {
- try {
- for (AggregationDesc desc : descs) {
- GenericUDAFEvaluator udaf = desc.getGenericUDAFEvaluator();
- AggregationBuffer buf = udaf.getNewAggregationBuffer();
- Class<?> bufferClass = buf.getClass();
- Field[] fields = bufferClass.getDeclaredFields();
- for (Field field : fields) {
- field.setAccessible(true);
- String type = field.getType().toString();
- if (!(type.equals("int") || type.equals("long")
- || type.equals("float") || type.equals("double") || type
- .equals("boolean"))) {
- return false;
- }
- }
-
- }
- return true;
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
deleted file mode 100644
index aea4be5..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/JoinVisitor.java
+++ /dev/null
@@ -1,445 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
-import org.apache.hadoop.hive.ql.plan.JoinDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LeftOuterJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-@SuppressWarnings("rawtypes")
-public class JoinVisitor extends DefaultVisitor {
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<LogicalVariable>> reduceSinkToKeyVariables = new HashMap<Operator, List<LogicalVariable>>();
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<String>> reduceSinkToFieldNames = new HashMap<Operator, List<String>>();
-
- /**
- * reduce sink operator to variables
- */
- private HashMap<Operator, List<TypeInfo>> reduceSinkToTypes = new HashMap<Operator, List<TypeInfo>>();
-
- /**
- * map a join operator (in hive) to its parent operators (in hive)
- */
- private HashMap<Operator, List<Operator>> operatorToHiveParents = new HashMap<Operator, List<Operator>>();
-
- /**
- * map a join operator (in hive) to its parent operators (in asterix)
- */
- private HashMap<Operator, List<ILogicalOperator>> operatorToAsterixParents = new HashMap<Operator, List<ILogicalOperator>>();
-
- /**
- * the latest traversed reduce sink operator
- */
- private Operator latestReduceSink = null;
-
- /**
- * the latest generated parent for join
- */
- private ILogicalOperator latestAlgebricksOperator = null;
-
- /**
- * process a join operator
- */
- @Override
- public Mutable<ILogicalOperator> visit(JoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
- latestAlgebricksOperator = AlgebricksParentOperator.getValue();
- translateJoinOperatorPreprocess(operator, t);
- List<Operator> parents = operatorToHiveParents.get(operator);
- if (parents.size() < operator.getParentOperators().size()) {
- return null;
- } else {
- ILogicalOperator joinOp = translateJoinOperator(operator,
- AlgebricksParentOperator, t);
- // clearStatus();
- return new MutableObject<ILogicalOperator>(joinOp);
- }
- }
-
- private void reorder(Byte[] order, List<ILogicalOperator> parents,
- List<Operator> hiveParents) {
- ILogicalOperator[] lops = new ILogicalOperator[parents.size()];
- Operator[] ops = new Operator[hiveParents.size()];
-
- for (Operator op : hiveParents) {
- ReduceSinkOperator rop = (ReduceSinkOperator) op;
- ReduceSinkDesc rdesc = rop.getConf();
- int tag = rdesc.getTag();
-
- int index = -1;
- for (int i = 0; i < order.length; i++)
- if (order[i] == tag) {
- index = i;
- break;
- }
- lops[index] = parents.get(hiveParents.indexOf(op));
- ops[index] = op;
- }
-
- parents.clear();
- hiveParents.clear();
-
- for (int i = 0; i < lops.length; i++) {
- parents.add(lops[i]);
- hiveParents.add(ops[i]);
- }
- }
-
- /**
- * translate a hive join operator to asterix join operator->assign
- * operator->project operator
- *
- * @param parentOperator
- * @param operator
- * @return
- */
- private ILogicalOperator translateJoinOperator(Operator operator,
- Mutable<ILogicalOperator> parentOperator, Translator t) {
-
- JoinDesc joinDesc = (JoinDesc) operator.getConf();
-
- // get the projection expression (already re-written) from each source
- // table
- Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
- reorder(joinDesc.getTagOrder(), operatorToAsterixParents.get(operator),
- operatorToHiveParents.get(operator));
-
- // make an reduce join operator
- ILogicalOperator currentOperator = generateJoinTree(
- joinDesc.getCondsList(),
- operatorToAsterixParents.get(operator),
- operatorToHiveParents.get(operator), 0, t);
- parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
-
- // add assign and project operator on top of a join
- // output variables
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
- while (iterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = iterator.next().getValue();
- ILogicalOperator assignOperator = t.getAssignOperator(
- parentOperator, outputExprs, variables);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- }
-
- ILogicalOperator po = new ProjectOperator(variables);
- po.getInputs().add(parentOperator);
- t.rewriteOperatorOutputSchema(variables, operator);
- return po;
- }
-
- /**
- * deal with reduce sink operator for the case of join
- */
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> parentOperator, Translator t) {
-
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- if (!(downStream instanceof JoinOperator))
- return null;
-
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- List<ExprNodeDesc> keys = desc.getKeyCols();
- List<ExprNodeDesc> values = desc.getValueCols();
- List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
-
- /**
- * rewrite key, value, paritioncol expressions
- */
- for (ExprNodeDesc key : keys)
- t.rewriteExpression(key);
- for (ExprNodeDesc value : values)
- t.rewriteExpression(value);
- for (ExprNodeDesc col : partitionCols)
- t.rewriteExpression(col);
-
- ILogicalOperator currentOperator = null;
-
- // add assign operator for keys if necessary
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator assignOperator = t.getAssignOperator(parentOperator,
- keys, keyVariables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // add assign operator for values if necessary
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- assignOperator = t.getAssignOperator(parentOperator, values, variables);
- if (assignOperator != null) {
- currentOperator = assignOperator;
- parentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- // unified schema: key, value
- ArrayList<LogicalVariable> unifiedKeyValues = new ArrayList<LogicalVariable>();
- unifiedKeyValues.addAll(keyVariables);
- for (LogicalVariable value : variables)
- if (keyVariables.indexOf(value) < 0)
- unifiedKeyValues.add(value);
-
- // insert projection operator, it is a *must*,
- // in hive, reduce sink sometimes also do the projection operator's
- // task
- currentOperator = new ProjectOperator(unifiedKeyValues);
- currentOperator.getInputs().add(parentOperator);
- parentOperator = new MutableObject<ILogicalOperator>(currentOperator);
-
- reduceSinkToKeyVariables.put(operator, keyVariables);
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (LogicalVariable var : unifiedKeyValues) {
- fieldNames.add(var.toString());
- types.add(t.getType(var));
- }
- reduceSinkToFieldNames.put(operator, fieldNames);
- reduceSinkToTypes.put(operator, types);
- t.rewriteOperatorOutputSchema(variables, operator);
-
- latestAlgebricksOperator = currentOperator;
- latestReduceSink = operator;
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- /**
- * partial rewrite a join operator
- *
- * @param operator
- * @param t
- */
- private void translateJoinOperatorPreprocess(Operator operator, Translator t) {
- JoinDesc desc = (JoinDesc) operator.getConf();
- ReduceSinkDesc reduceSinkDesc = (ReduceSinkDesc) latestReduceSink
- .getConf();
- int tag = reduceSinkDesc.getTag();
-
- Map<Byte, List<ExprNodeDesc>> exprMap = desc.getExprs();
- List<ExprNodeDesc> exprs = exprMap.get(Byte.valueOf((byte) tag));
-
- for (ExprNodeDesc expr : exprs)
- t.rewriteExpression(expr);
-
- List<Operator> parents = operatorToHiveParents.get(operator);
- if (parents == null) {
- parents = new ArrayList<Operator>();
- operatorToHiveParents.put(operator, parents);
- }
- parents.add(latestReduceSink);
-
- List<ILogicalOperator> asterixParents = operatorToAsterixParents
- .get(operator);
- if (asterixParents == null) {
- asterixParents = new ArrayList<ILogicalOperator>();
- operatorToAsterixParents.put(operator, asterixParents);
- }
- asterixParents.add(latestAlgebricksOperator);
- }
-
- // generate a join tree from a list of exchange/reducesink operator
- // both exchanges and reduce sinks have the same order
- private ILogicalOperator generateJoinTree(List<JoinCondDesc> conds,
- List<ILogicalOperator> exchanges, List<Operator> reduceSinks,
- int offset, Translator t) {
- // get a list of reduce sink descs (input descs)
- int inputSize = reduceSinks.size() - offset;
-
- if (inputSize == 2) {
- ILogicalOperator currentRoot;
-
- List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
- for (int i = reduceSinks.size() - 1; i >= offset; i--)
- reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i)
- .getConf());
-
- // get the object inspector for the join
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (int i = reduceSinks.size() - 1; i >= offset; i--) {
- fieldNames
- .addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
- types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
- }
-
- // get number of equality conjunctions in the final join condition
- int size = reduceSinkDescs.get(0).getKeyCols().size();
-
- // make up the join conditon expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (ReduceSinkDesc sink : reduceSinkDescs) {
- keyPair.add(sink.getKeyCols().get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo,
- new GenericUDFOPEqual(), keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, equal-join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(exchanges.size() - 1));
- Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(exchanges.size() - 2));
- // get the join operator
- if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- Mutable<ILogicalOperator> temp = leftBranch;
- leftBranch = rightBranch;
- rightBranch = temp;
- } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- } else
- currentRoot = new InnerJoinOperator(expression);
-
- currentRoot.getInputs().add(leftBranch);
- currentRoot.getInputs().add(rightBranch);
-
- // rewriteOperatorOutputSchema(variables, operator);
- return currentRoot;
- } else {
- // get the child join operator and insert and one-to-one exchange
- ILogicalOperator joinSrcOne = generateJoinTree(conds, exchanges,
- reduceSinks, offset + 1, t);
- // joinSrcOne.addInput(childJoin);
-
- ILogicalOperator currentRoot;
-
- List<ReduceSinkDesc> reduceSinkDescs = new ArrayList<ReduceSinkDesc>();
- for (int i = offset; i < offset + 2; i++)
- reduceSinkDescs.add((ReduceSinkDesc) reduceSinks.get(i)
- .getConf());
-
- // get the object inspector for the join
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (int i = offset; i < reduceSinks.size(); i++) {
- fieldNames
- .addAll(reduceSinkToFieldNames.get(reduceSinks.get(i)));
- types.addAll(reduceSinkToTypes.get(reduceSinks.get(i)));
- }
-
- // get number of equality conjunctions in the final join condition
- int size = reduceSinkDescs.get(0).getKeyCols().size();
-
- // make up the join condition expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (ReduceSinkDesc sink : reduceSinkDescs) {
- keyPair.add(sink.getKeyCols().get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo,
- new GenericUDFOPEqual(), keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, full outer join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- Mutable<ILogicalOperator> leftBranch = new MutableObject<ILogicalOperator>(
- joinSrcOne);
- Mutable<ILogicalOperator> rightBranch = new MutableObject<ILogicalOperator>(
- exchanges.get(offset));
-
- // get the join operator
- if (conds.get(offset).getType() == JoinDesc.LEFT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- Mutable<ILogicalOperator> temp = leftBranch;
- leftBranch = rightBranch;
- rightBranch = temp;
- } else if (conds.get(offset).getType() == JoinDesc.RIGHT_OUTER_JOIN) {
- currentRoot = new LeftOuterJoinOperator(expression);
- } else
- currentRoot = new InnerJoinOperator(expression);
-
- // set the inputs from Algebricks join operator
- // add the current table
- currentRoot.getInputs().add(leftBranch);
- currentRoot.getInputs().add(rightBranch);
-
- return currentRoot;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
deleted file mode 100644
index 004a8c2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LateralViewJoinVisitor.java
+++ /dev/null
@@ -1,124 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-/**
- * The lateral view join operator is used for FROM src LATERAL VIEW udtf()...
- * This operator was implemented with the following operator DAG in mind.
- *
- * For a query such as
- *
- * SELECT pageid, adid.* FROM example_table LATERAL VIEW explode(adid_list) AS
- * adid
- *
- * The top of the operator DAG will look similar to
- *
- * [Table Scan] | [Lateral View Forward] / \ [Select](*) [Select](adid_list) | |
- * | [UDTF] (explode) \ / [Lateral View Join] | | [Select] (pageid, adid.*) |
- * ....
- *
- * Rows from the table scan operator are first to a lateral view forward
- * operator that just forwards the row and marks the start of a LV. The select
- * operator on the left picks all the columns while the select operator on the
- * right picks only the columns needed by the UDTF.
- *
- * The output of select in the left branch and output of the UDTF in the right
- * branch are then sent to the lateral view join (LVJ). In most cases, the UDTF
- * will generate > 1 row for every row received from the TS, while the left
- * select operator will generate only one. For each row output from the TS, the
- * LVJ outputs all possible rows that can be created by joining the row from the
- * left select and one of the rows output from the UDTF.
- *
- * Additional lateral views can be supported by adding a similar DAG after the
- * previous LVJ operator.
- */
-
-@SuppressWarnings("rawtypes")
-public class LateralViewJoinVisitor extends DefaultVisitor {
-
- private UDTFDesc udtf;
-
- private List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(LateralViewJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
-
- parents.add(AlgebricksParentOperatorRef);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- Operator parent0 = operator.getParentOperators().get(0);
- ILogicalOperator parentOperator;
- ILogicalExpression unnestArg;
- if (parent0 instanceof UDTFOperator) {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(1).getValue(),
- unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(1).getValue();
- } else {
- List<LogicalVariable> unnestVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parents.get(0).getValue(),
- unnestVars);
- unnestArg = new VariableReferenceExpression(unnestVars.get(0));
- parentOperator = parents.get(0).getValue();
- }
-
- LogicalVariable var = t.getVariable(udtf.toString(),
- TypeInfoFactory.unknownTypeInfo);
-
- Mutable<ILogicalExpression> unnestExpr = t.translateUnnestFunction(
- udtf, new MutableObject<ILogicalExpression>(unnestArg));
- ILogicalOperator currentOperator = new UnnestOperator(var, unnestExpr);
-
- List<LogicalVariable> outputVars = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(parentOperator, outputVars);
- outputVars.add(var);
- currentOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(parentOperator));
-
- parents.clear();
- udtf = null;
- t.rewriteOperatorOutputSchema(outputVars, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
- udtf = (UDTFDesc) operator.getConf();
-
- // populate the schema from upstream operator
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
deleted file mode 100644
index 84cdf00..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/LimitVisitor.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.plan.LimitDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-
-public class LimitVisitor extends DefaultVisitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(LimitOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- Schema currentSchema = t.generateInputSchema(operator
- .getParentOperators().get(0));
-
- LimitDesc desc = (LimitDesc) operator.getConf();
- int limit = desc.getLimit();
- Integer limitValue = new Integer(limit);
-
- ILogicalExpression expr = new ConstantExpression(
- new HivesterixConstantValue(limitValue));
- ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.LimitOperator(
- expr, true);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
-
- operator.setSchema(operator.getParentOperators().get(0).getSchema());
- List<LogicalVariable> latestOutputSchema = t
- .getVariablesFromSchema(currentSchema);
- t.rewriteOperatorOutputSchema(latestOutputSchema, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
deleted file mode 100644
index fa5d014..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/MapJoinVisitor.java
+++ /dev/null
@@ -1,183 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-@SuppressWarnings("rawtypes")
-public class MapJoinVisitor extends DefaultVisitor {
-
- /**
- * map a join operator (in hive) to its parent operators (in asterix)
- */
- private HashMap<Operator, List<Mutable<ILogicalOperator>>> opMap = new HashMap<Operator, List<Mutable<ILogicalOperator>>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(MapJoinOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t) {
- List<Operator<? extends Serializable>> joinSrc = operator
- .getParentOperators();
- List<Mutable<ILogicalOperator>> parents = opMap.get(operator);
- if (parents == null) {
- parents = new ArrayList<Mutable<ILogicalOperator>>();
- opMap.put(operator, parents);
- }
- parents.add(AlgebricksParentOperatorRef);
- if (joinSrc.size() != parents.size())
- return null;
-
- ILogicalOperator currentOperator;
- // make an map join operator
- // TODO: will have trouble for n-way joins
- MapJoinDesc joinDesc = (MapJoinDesc) operator.getConf();
-
- Map<Byte, List<ExprNodeDesc>> keyMap = joinDesc.getKeys();
- // get the projection expression (already re-written) from each source
- // table
- Map<Byte, List<ExprNodeDesc>> exprMap = joinDesc.getExprs();
-
- int inputSize = operator.getParentOperators().size();
- // get a list of reduce sink descs (input descs)
-
- // get the parent operator
- List<Mutable<ILogicalOperator>> parentOps = parents;
-
- List<String> fieldNames = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (Operator ts : joinSrc) {
- List<ColumnInfo> columns = ts.getSchema().getSignature();
- for (ColumnInfo col : columns) {
- fieldNames.add(col.getInternalName());
- types.add(col.getType());
- }
- }
-
- // get number of equality conjunctions in the final join condition
- Set<Entry<Byte, List<ExprNodeDesc>>> keyEntries = keyMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> entry = keyEntries.iterator();
-
- int size = 0;
- if (entry.hasNext())
- size = entry.next().getValue().size();
-
- // make up the join conditon expression
- List<ExprNodeDesc> joinConditionChildren = new ArrayList<ExprNodeDesc>();
- for (int i = 0; i < size; i++) {
- // create a join key pair
- List<ExprNodeDesc> keyPair = new ArrayList<ExprNodeDesc>();
- for (int j = 0; j < inputSize; j++) {
- keyPair.add(keyMap.get(Byte.valueOf((byte) j)).get(i));
- }
- // create a hive equal condition
- ExprNodeDesc equality = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPEqual(),
- keyPair);
- // add the equal condition to the conjunction list
- joinConditionChildren.add(equality);
- }
- // get final conjunction expression
- ExprNodeDesc conjunct = null;
-
- if (joinConditionChildren.size() > 1)
- conjunct = new ExprNodeGenericFuncDesc(
- TypeInfoFactory.booleanTypeInfo, new GenericUDFOPAnd(),
- joinConditionChildren);
- else if (joinConditionChildren.size() == 1)
- conjunct = joinConditionChildren.get(0);
- else {
- // there is no join equality condition, full outer join
- conjunct = new ExprNodeConstantDesc(
- TypeInfoFactory.booleanTypeInfo, new Boolean(true));
- }
- // get an ILogicalExpression from hive's expression
- Mutable<ILogicalExpression> expression = t
- .translateScalarFucntion(conjunct);
-
- ArrayList<LogicalVariable> left = new ArrayList<LogicalVariable>();
- ArrayList<LogicalVariable> right = new ArrayList<LogicalVariable>();
-
- Set<Entry<Byte, List<ExprNodeDesc>>> kentries = keyMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> kiterator = kentries
- .iterator();
- int iteration = 0;
- ILogicalOperator assignOperator = null;
- while (kiterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = kiterator.next().getValue();
-
- if (iteration == 0)
- assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, outputExprs, left);
- else
- assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, outputExprs, right);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- iteration++;
- }
-
- List<Mutable<ILogicalOperator>> inputs = parentOps;
-
- // get the join operator
- currentOperator = new InnerJoinOperator(expression);
-
- // set the inputs from asterix join operator
- for (Mutable<ILogicalOperator> input : inputs)
- currentOperator.getInputs().add(input);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
-
- // add assign and project operator
- // output variables
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- Set<Entry<Byte, List<ExprNodeDesc>>> entries = exprMap.entrySet();
- Iterator<Entry<Byte, List<ExprNodeDesc>>> iterator = entries.iterator();
- while (iterator.hasNext()) {
- List<ExprNodeDesc> outputExprs = iterator.next().getValue();
- assignOperator = t.getAssignOperator(AlgebricksParentOperatorRef,
- outputExprs, variables);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- }
-
- currentOperator = new ProjectOperator(variables);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- t.rewriteOperatorOutputSchema(variables, operator);
- // opMap.clear();
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
deleted file mode 100644
index 0d2067c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/ProjectVisitor.java
+++ /dev/null
@@ -1,58 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.SelectDesc;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-
-public class ProjectVisitor extends DefaultVisitor {
-
- /**
- * translate project operator
- */
- @Override
- public Mutable<ILogicalOperator> visit(SelectOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
- SelectDesc desc = (SelectDesc) operator.getConf();
-
- if (desc == null)
- return null;
-
- List<ExprNodeDesc> cols = desc.getColList();
-
- if (cols == null)
- return null;
-
- // insert assign operator if necessary
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
-
- for (ExprNodeDesc expr : cols)
- t.rewriteExpression(expr);
-
- ILogicalOperator assignOp = t.getAssignOperator(
- AlgebricksParentOperator, cols, variables);
- ILogicalOperator currentOperator = null;
- if (assignOp != null) {
- currentOperator = assignOp;
- AlgebricksParentOperator = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- currentOperator = new ProjectOperator(variables);
- currentOperator.getInputs().add(AlgebricksParentOperator);
- t.rewriteOperatorOutputSchema(variables, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
deleted file mode 100644
index a2c0d03..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/SortVisitor.java
+++ /dev/null
@@ -1,125 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.OrderOperator.IOrder;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.OrderColumn;
-
-public class SortVisitor extends DefaultVisitor {
-
- @SuppressWarnings("rawtypes")
- @Override
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException {
- ReduceSinkDesc desc = (ReduceSinkDesc) operator.getConf();
- Operator downStream = (Operator) operator.getChildOperators().get(0);
- List<ExprNodeDesc> keys = desc.getKeyCols();
- if (!(downStream instanceof ExtractOperator
- && desc.getNumReducers() == 1 && keys.size() > 0)) {
- return null;
- }
-
- List<ExprNodeDesc> schema = new ArrayList<ExprNodeDesc>();
- List<ExprNodeDesc> values = desc.getValueCols();
- List<ExprNodeDesc> partitionCols = desc.getPartitionCols();
- for (ExprNodeDesc key : keys) {
- t.rewriteExpression(key);
- }
- for (ExprNodeDesc value : values) {
- t.rewriteExpression(value);
- }
- for (ExprNodeDesc col : partitionCols) {
- t.rewriteExpression(col);
- }
-
- // add a order-by operator and limit if any
- List<Pair<IOrder, Mutable<ILogicalExpression>>> pairs = new ArrayList<Pair<IOrder, Mutable<ILogicalExpression>>>();
- char[] orders = desc.getOrder().toCharArray();
- int i = 0;
- for (ExprNodeDesc key : keys) {
- Mutable<ILogicalExpression> expr = t.translateScalarFucntion(key);
- IOrder order = orders[i] == '+' ? OrderOperator.ASC_ORDER
- : OrderOperator.DESC_ORDER;
-
- Pair<IOrder, Mutable<ILogicalExpression>> pair = new Pair<IOrder, Mutable<ILogicalExpression>>(
- order, expr);
- pairs.add(pair);
- i++;
- }
-
- // get input variables
- ArrayList<LogicalVariable> inputVariables = new ArrayList<LogicalVariable>();
- VariableUtilities.getProducedVariables(
- AlgebricksParentOperatorRef.getValue(), inputVariables);
-
- ArrayList<LogicalVariable> keyVariables = new ArrayList<LogicalVariable>();
- ILogicalOperator currentOperator;
- ILogicalOperator assignOp = t.getAssignOperator(
- AlgebricksParentOperatorRef, keys, keyVariables);
- if (assignOp != null) {
- currentOperator = assignOp;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- OrderColumn[] keyColumns = new OrderColumn[keyVariables.size()];
-
- for (int j = 0; j < keyColumns.length; j++)
- keyColumns[j] = new OrderColumn(keyVariables.get(j),
- pairs.get(j).first.getKind());
-
- // handle order operator
- currentOperator = new OrderOperator(pairs);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
-
- // project back, remove generated sort-key columns if any
- if (assignOp != null) {
- currentOperator = new ProjectOperator(inputVariables);
- currentOperator.getInputs().add(AlgebricksParentOperatorRef);
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
-
- /**
- * a special rule for hive's order by output schema of reduce sink
- * operator only contains the columns
- */
- for (ExprNodeDesc value : values) {
- schema.add(value);
- }
-
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- ILogicalOperator assignOperator = t.getAssignOperator(
- AlgebricksParentOperatorRef, schema, variables);
- t.rewriteOperatorOutputSchema(variables, operator);
-
- if (assignOperator != null) {
- currentOperator = assignOperator;
- AlgebricksParentOperatorRef = new MutableObject<ILogicalOperator>(
- currentOperator);
- }
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
deleted file mode 100644
index 3e12bb9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/TableScanWriteVisitor.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.ColumnInfo;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.TableScanDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSink;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveDataSource;
-import edu.uci.ics.hivesterix.runtime.jobgen.HiveMetaDataProvider;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DataSourceScanOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-public class TableScanWriteVisitor extends DefaultVisitor {
-
- /**
- * map from alias to partition desc
- */
- private HashMap<String, PartitionDesc> aliasToPathMap;
-
- /**
- * map from partition desc to data source
- */
- private HashMap<PartitionDesc, IDataSource<PartitionDesc>> dataSourceMap = new HashMap<PartitionDesc, IDataSource<PartitionDesc>>();
-
- /**
- * constructor
- *
- * @param aliasToPathMap
- */
- public TableScanWriteVisitor(HashMap<String, PartitionDesc> aliasToPathMap) {
- this.aliasToPathMap = aliasToPathMap;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(TableScanOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- TableScanDesc desc = (TableScanDesc) operator.getConf();
- if (desc == null) {
- List<LogicalVariable> schema = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(
- AlgebricksParentOperator.getValue(), schema);
- t.rewriteOperatorOutputSchema(schema, operator);
- return null;
- }
-
- List<ColumnInfo> columns = operator.getSchema().getSignature();
- for (int i = columns.size() - 1; i >= 0; i--)
- if (columns.get(i).getIsVirtualCol() == true)
- columns.remove(i);
-
- // start with empty tuple operator
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- ArrayList<LogicalVariable> variables = new ArrayList<LogicalVariable>();
- List<String> names = new ArrayList<String>();
- for (ColumnInfo column : columns) {
- types.add(column.getType());
-
- LogicalVariable var = t.getVariableFromFieldName(column
- .getTabAlias() + "." + column.getInternalName());
- LogicalVariable varNew;
-
- if (var != null) {
- varNew = t.getVariable(
- column.getTabAlias() + "." + column.getInternalName()
- + operator.toString(), column.getType());
- t.replaceVariable(var, varNew);
- var = varNew;
- } else
- var = t.getNewVariable(
- column.getTabAlias() + "." + column.getInternalName(),
- column.getType());
-
- variables.add(var);
- names.add(column.getInternalName());
- }
- Schema currentSchema = new Schema(names, types);
-
- String alias = desc.getAlias();
- PartitionDesc partDesc = aliasToPathMap.get(alias);
- IDataSource<PartitionDesc> dataSource = new HiveDataSource<PartitionDesc>(
- partDesc, currentSchema.getSchema());
- ILogicalOperator currentOperator = new DataSourceScanOperator(
- variables, dataSource);
-
- // set empty tuple source operator
- ILogicalOperator ets = new EmptyTupleSourceOperator();
- currentOperator.getInputs().add(
- new MutableObject<ILogicalOperator>(ets));
-
- // setup data source
- dataSourceMap.put(partDesc, dataSource);
- t.rewriteOperatorOutputSchema(variables, operator);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t) {
-
- if (hiveOperator.getChildOperators() != null
- && hiveOperator.getChildOperators().size() > 0)
- return null;
-
- Schema currentSchema = t.generateInputSchema(hiveOperator
- .getParentOperators().get(0));
-
- IDataSink sink = new HiveDataSink(hiveOperator,
- currentSchema.getSchema());
- List<Mutable<ILogicalExpression>> exprList = new ArrayList<Mutable<ILogicalExpression>>();
- for (String column : currentSchema.getNames()) {
- exprList.add(new MutableObject<ILogicalExpression>(
- new VariableReferenceExpression(t.getVariable(column))));
- }
-
- ILogicalOperator currentOperator = new WriteOperator(exprList, sink);
- if (AlgebricksParentOperator != null) {
- currentOperator.getInputs().add(AlgebricksParentOperator);
- }
-
- IMetadataProvider<PartitionDesc, Object> metaData = new HiveMetaDataProvider<PartitionDesc, Object>(
- hiveOperator, currentSchema, dataSourceMap);
- t.setMetadataProvider(metaData);
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
deleted file mode 100644
index f4e74f6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/UnionVisitor.java
+++ /dev/null
@@ -1,64 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.DefaultVisitor;
-import edu.uci.ics.hivesterix.logical.plan.visitor.base.Translator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-
-public class UnionVisitor extends DefaultVisitor {
-
- List<Mutable<ILogicalOperator>> parents = new ArrayList<Mutable<ILogicalOperator>>();
-
- @Override
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
-
- parents.add(AlgebricksParentOperator);
- if (operator.getParentOperators().size() > parents.size()) {
- return null;
- }
-
- List<LogicalVariable> leftVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> rightVars = new ArrayList<LogicalVariable>();
-
- VariableUtilities.getUsedVariables(parents.get(0).getValue(), leftVars);
- VariableUtilities
- .getUsedVariables(parents.get(1).getValue(), rightVars);
-
- List<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> triples = new ArrayList<Triple<LogicalVariable, LogicalVariable, LogicalVariable>>();
- List<LogicalVariable> unionVars = new ArrayList<LogicalVariable>();
-
- for (int i = 0; i < leftVars.size(); i++) {
- LogicalVariable unionVar = t.getVariable(leftVars.get(i).getId()
- + "union" + AlgebricksParentOperator.hashCode(),
- TypeInfoFactory.unknownTypeInfo);
- unionVars.add(unionVar);
- Triple<LogicalVariable, LogicalVariable, LogicalVariable> triple = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(
- leftVars.get(i), rightVars.get(i), unionVar);
- t.replaceVariable(leftVars.get(i), unionVar);
- t.replaceVariable(rightVars.get(i), unionVar);
- triples.add(triple);
- }
- ILogicalOperator currentOperator = new edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator(
- triples);
- for (Mutable<ILogicalOperator> parent : parents)
- currentOperator.getInputs().add(parent);
-
- t.rewriteOperatorOutputSchema(unionVars, operator);
- parents.clear();
- return new MutableObject<ILogicalOperator>(currentOperator);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
deleted file mode 100644
index 20013e3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/DefaultVisitor.java
+++ /dev/null
@@ -1,166 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.CollectOperator;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.ForwardOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.MapOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-
-/**
- * a default empty implementation of visitor
- *
- * @author yingyib
- */
-public class DefaultVisitor implements Visitor {
-
- @Override
- public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(
- LateralViewForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(
- LateralViewJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperator, Translator t)
- throws AlgebricksException {
- return null;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
deleted file mode 100644
index 9165386..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Translator.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-
-@SuppressWarnings("rawtypes")
-public interface Translator {
-
- /**
- * generate input schema
- *
- * @param operator
- * @return
- */
- public Schema generateInputSchema(Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(List<LogicalVariable> vars,
- Operator operator);
-
- /**
- * rewrite the names of output columns for feture expression evaluators to
- * use
- *
- * @param operator
- */
- public void rewriteOperatorOutputSchema(Operator operator);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpression(ExprNodeDesc expr);
-
- /**
- * rewrite an expression and substitute variables
- *
- * @param expr
- * hive expression
- */
- public void rewriteExpressionPartial(ExprNodeDesc expr);
-
- /**
- * get an assign operator as a child of parent
- *
- * @param parent
- * @param cols
- * @param variables
- * @return
- */
- public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent,
- List<ExprNodeDesc> cols, ArrayList<LogicalVariable> variables);
-
- /**
- * get type for a logical variable
- *
- * @param var
- * @return type info
- */
- public TypeInfo getType(LogicalVariable var);
-
- /**
- * translate an expression from hive to Algebricks
- *
- * @param desc
- * @return
- */
- public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc desc);
-
- /**
- * translate an aggregation from hive to Algebricks
- *
- * @param aggregateDesc
- * @return
- */
- public Mutable<ILogicalExpression> translateAggregation(
- AggregationDesc aggregateDesc);
-
- /**
- * translate unnesting (UDTF) function expression
- *
- * @param aggregator
- * @return
- */
- public Mutable<ILogicalExpression> translateUnnestFunction(
- UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument);
-
- /**
- * get variable from a schema
- *
- * @param schema
- * @return
- */
- public List<LogicalVariable> getVariablesFromSchema(Schema schema);
-
- /**
- * get variable from name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariable(String name);
-
- /**
- * get variable from field name
- *
- * @param name
- * @return
- */
- public LogicalVariable getVariableFromFieldName(String name);
-
- /**
- * get variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getVariable(String fieldName, TypeInfo type);
-
- /**
- * get new variable from name, type
- *
- * @param fieldName
- * @param type
- * @return
- */
- public LogicalVariable getNewVariable(String fieldName, TypeInfo type);
-
- /**
- * set the metadata provider
- *
- * @param metadata
- */
- public void setMetadataProvider(
- IMetadataProvider<PartitionDesc, Object> metadata);
-
- /**
- * get the metadata provider
- *
- * @param metadata
- */
- public IMetadataProvider<PartitionDesc, Object> getMetadataProvider();
-
- /**
- * replace the variable
- *
- * @param oldVar
- * @param newVar
- */
- public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar);
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
deleted file mode 100644
index 745f93e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/visitor/base/Visitor.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package edu.uci.ics.hivesterix.logical.plan.visitor.base;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.CollectOperator;
-import org.apache.hadoop.hive.ql.exec.ExtractOperator;
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.FilterOperator;
-import org.apache.hadoop.hive.ql.exec.ForwardOperator;
-import org.apache.hadoop.hive.ql.exec.GroupByOperator;
-import org.apache.hadoop.hive.ql.exec.JoinOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewForwardOperator;
-import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator;
-import org.apache.hadoop.hive.ql.exec.LimitOperator;
-import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.MapOperator;
-import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
-import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
-import org.apache.hadoop.hive.ql.exec.ScriptOperator;
-import org.apache.hadoop.hive.ql.exec.SelectOperator;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.UDTFOperator;
-import org.apache.hadoop.hive.ql.exec.UnionOperator;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-
-public interface Visitor {
-
- public Mutable<ILogicalOperator> visit(CollectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(JoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ExtractOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(MapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(SMBMapJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(FilterOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(GroupByOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(
- LateralViewForwardOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(
- LateralViewJoinOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(LimitOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(MapOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ScriptOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(SelectOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(TableScanOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(FileSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(ReduceSinkOperator hiveOperator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(UDTFOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-
- public Mutable<ILogicalOperator> visit(UnionOperator operator,
- Mutable<ILogicalOperator> AlgebricksParentOperatorRef, Translator t)
- throws AlgebricksException;
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
deleted file mode 100644
index 4ebea0a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rulecollections/HiveRuleCollections.java
+++ /dev/null
@@ -1,114 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rulecollections;
-
-import java.util.LinkedList;
-
-import edu.uci.ics.hivesterix.optimizer.rules.InsertProjectBeforeWriteRule;
-import edu.uci.ics.hivesterix.optimizer.rules.IntroduceEarlyProjectRule;
-import edu.uci.ics.hivesterix.optimizer.rules.LocalGroupByRule;
-import edu.uci.ics.hivesterix.optimizer.rules.RemoveRedundantSelectRule;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.HeuristicOptimizer;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.BreakSelectIntoConjunctsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ComplexJoinInferenceRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateAssignsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ConsolidateSelectsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EliminateSubplanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.EnforceStructuralPropertiesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractCommonOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ExtractGbyExpressionsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.FactorRedundantGroupAndDecorVarsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InferTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InlineVariablesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.InsertProjectBeforeUnionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceAggregateCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IntroduceGroupByCombinerRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.IsolateHyracksOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PullSelectOutOfEqJoin;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushLimitDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushProjectIntoDataSourceScanRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectDownRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.PushSelectIntoJoinRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.ReinferAllTypesRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveRedundantProjectionRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.RemoveUnusedAssignAndAggregateRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetAlgebricksPhysicalOperatorsRule;
-import edu.uci.ics.hyracks.algebricks.rewriter.rules.SetExecutionModeRule;
-
-public final class HiveRuleCollections {
-
- public final static LinkedList<IAlgebraicRewriteRule> NORMALIZATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- NORMALIZATION.add(new EliminateSubplanRule());
- NORMALIZATION.add(new IntroduceAggregateCombinerRule());
- NORMALIZATION.add(new BreakSelectIntoConjunctsRule());
- NORMALIZATION.add(new IntroduceAggregateCombinerRule());
- NORMALIZATION.add(new PushSelectIntoJoinRule());
- NORMALIZATION.add(new ExtractGbyExpressionsRule());
- NORMALIZATION.add(new RemoveRedundantSelectRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> COND_PUSHDOWN_AND_JOIN_INFERENCE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new PushSelectDownRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new InlineVariablesRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE
- .add(new FactorRedundantGroupAndDecorVarsRule());
- COND_PUSHDOWN_AND_JOIN_INFERENCE.add(new EliminateSubplanRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> LOAD_FIELDS = new LinkedList<IAlgebraicRewriteRule>();
- static {
- // should LoadRecordFieldsRule be applied in only one pass over the
- // plan?
- LOAD_FIELDS.add(new InlineVariablesRule());
- // LOAD_FIELDS.add(new RemoveUnusedAssignAndAggregateRule());
- LOAD_FIELDS.add(new ComplexJoinInferenceRule());
- LOAD_FIELDS.add(new InferTypesRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> OP_PUSHDOWN = new LinkedList<IAlgebraicRewriteRule>();
- static {
- OP_PUSHDOWN.add(new PushProjectDownRule());
- OP_PUSHDOWN.add(new PushSelectDownRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> DATA_EXCHANGE = new LinkedList<IAlgebraicRewriteRule>();
- static {
- DATA_EXCHANGE.add(new SetExecutionModeRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> CONSOLIDATION = new LinkedList<IAlgebraicRewriteRule>();
- static {
- CONSOLIDATION.add(new RemoveRedundantProjectionRule());
- CONSOLIDATION.add(new ConsolidateSelectsRule());
- CONSOLIDATION.add(new IntroduceEarlyProjectRule());
- CONSOLIDATION.add(new ConsolidateAssignsRule());
- CONSOLIDATION.add(new IntroduceGroupByCombinerRule());
- CONSOLIDATION.add(new RemoveUnusedAssignAndAggregateRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> PHYSICAL_PLAN_REWRITES = new LinkedList<IAlgebraicRewriteRule>();
- static {
- PHYSICAL_PLAN_REWRITES.add(new PullSelectOutOfEqJoin());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new EnforceStructuralPropertiesRule());
- PHYSICAL_PLAN_REWRITES.add(new PushProjectDownRule());
- PHYSICAL_PLAN_REWRITES.add(new SetAlgebricksPhysicalOperatorsRule());
- PHYSICAL_PLAN_REWRITES.add(new PushLimitDownRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeWriteRule());
- PHYSICAL_PLAN_REWRITES.add(new InsertProjectBeforeUnionRule());
- }
-
- public final static LinkedList<IAlgebraicRewriteRule> prepareJobGenRules = new LinkedList<IAlgebraicRewriteRule>();
- static {
- prepareJobGenRules.add(new ReinferAllTypesRule());
- prepareJobGenRules.add(new IsolateHyracksOperatorsRule(
- HeuristicOptimizer.hyraxOperatorsBelowWhichJobGenIsDisabled));
- prepareJobGenRules.add(new ExtractCommonOperatorsRule());
- prepareJobGenRules.add(new LocalGroupByRule());
- prepareJobGenRules.add(new PushProjectIntoDataSourceScanRule());
- prepareJobGenRules.add(new ReinferAllTypesRule());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
deleted file mode 100644
index c58982e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/InsertProjectBeforeWriteRule.java
+++ /dev/null
@@ -1,85 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.StreamProjectPOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class InsertProjectBeforeWriteRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) {
- return false;
- }
-
- /**
- * When the input schema to WriteOperator is different from the output
- * schema in terms of variable order, add a project operator to get the
- * write order
- */
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.WRITE) {
- return false;
- }
- WriteOperator opWrite = (WriteOperator) op;
- ArrayList<LogicalVariable> finalSchema = new ArrayList<LogicalVariable>();
- VariableUtilities.getUsedVariables(opWrite, finalSchema);
- ArrayList<LogicalVariable> inputSchema = new ArrayList<LogicalVariable>();
- VariableUtilities.getLiveVariables(opWrite, inputSchema);
- if (!isIdentical(finalSchema, inputSchema)) {
- ProjectOperator projectOp = new ProjectOperator(finalSchema);
- Mutable<ILogicalOperator> parentOpRef = opWrite.getInputs().get(0);
- projectOp.getInputs().add(parentOpRef);
- opWrite.getInputs().clear();
- opWrite.getInputs().add(
- new MutableObject<ILogicalOperator>(projectOp));
- projectOp.setPhysicalOperator(new StreamProjectPOperator());
- projectOp.setExecutionMode(ExecutionMode.PARTITIONED);
-
- AbstractLogicalOperator op2 = (AbstractLogicalOperator) parentOpRef
- .getValue();
- if (op2.getOperatorTag() == LogicalOperatorTag.PROJECT) {
- ProjectOperator pi2 = (ProjectOperator) op2;
- parentOpRef.setValue(pi2.getInputs().get(0).getValue());
- }
- context.computeAndSetTypeEnvironmentForOperator(projectOp);
- return true;
- } else
- return false;
-
- }
-
- private boolean isIdentical(List<LogicalVariable> finalSchema,
- List<LogicalVariable> inputSchema) {
- int finalSchemaSize = finalSchema.size();
- int inputSchemaSize = inputSchema.size();
- if (finalSchemaSize != inputSchemaSize)
- throw new IllegalStateException(
- "final output schema variables missing!");
- for (int i = 0; i < finalSchemaSize; i++) {
- LogicalVariable var1 = finalSchema.get(i);
- LogicalVariable var2 = inputSchema.get(i);
- if (!var1.equals(var2))
- return false;
- }
- return true;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
deleted file mode 100644
index 2bebe81..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/IntroduceEarlyProjectRule.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.commons.lang3.mutable.MutableObject;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class IntroduceEarlyProjectRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.PROJECT) {
- return false;
- }
- AbstractLogicalOperator middleOp = (AbstractLogicalOperator) op
- .getInputs().get(0).getValue();
- List<LogicalVariable> deliveredVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
- List<LogicalVariable> producedVars = new ArrayList<LogicalVariable>();
-
- VariableUtilities.getUsedVariables(op, deliveredVars);
- VariableUtilities.getUsedVariables(middleOp, usedVars);
- VariableUtilities.getProducedVariables(middleOp, producedVars);
-
- Set<LogicalVariable> requiredVariables = new HashSet<LogicalVariable>();
- requiredVariables.addAll(deliveredVars);
- requiredVariables.addAll(usedVars);
- requiredVariables.removeAll(producedVars);
-
- if (middleOp.getInputs().size() <= 0 || middleOp.getInputs().size() > 1)
- return false;
-
- AbstractLogicalOperator targetOp = (AbstractLogicalOperator) middleOp
- .getInputs().get(0).getValue();
- if (targetOp.getOperatorTag() != LogicalOperatorTag.DATASOURCESCAN)
- return false;
-
- Set<LogicalVariable> deliveredEarlyVars = new HashSet<LogicalVariable>();
- VariableUtilities.getLiveVariables(targetOp, deliveredEarlyVars);
-
- deliveredEarlyVars.removeAll(requiredVariables);
- if (deliveredEarlyVars.size() > 0) {
- ArrayList<LogicalVariable> requiredVars = new ArrayList<LogicalVariable>();
- requiredVars.addAll(requiredVariables);
- ILogicalOperator earlyProjectOp = new ProjectOperator(requiredVars);
- Mutable<ILogicalOperator> earlyProjectOpRef = new MutableObject<ILogicalOperator>(
- earlyProjectOp);
- Mutable<ILogicalOperator> targetRef = middleOp.getInputs().get(0);
- middleOp.getInputs().set(0, earlyProjectOpRef);
- earlyProjectOp.getInputs().add(targetRef);
- context.computeAndSetTypeEnvironmentForOperator(earlyProjectOp);
- return true;
- }
- return false;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
deleted file mode 100644
index 72cbe21..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/LocalGroupByRule.java
+++ /dev/null
@@ -1,71 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import org.apache.commons.lang3.mutable.Mutable;
-
-import edu.uci.ics.hivesterix.logical.plan.HiveOperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IPhysicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.OperatorAnnotations;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.PhysicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.OneToOneExchangePOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class LocalGroupByRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
- return false;
- }
- Boolean localGby = (Boolean) op.getAnnotations().get(
- HiveOperatorAnnotations.LOCAL_GROUP_BY);
- if (localGby != null && localGby.equals(Boolean.TRUE)) {
- Boolean hashGby = (Boolean) op.getAnnotations().get(
- OperatorAnnotations.USE_HASH_GROUP_BY);
- Boolean externalGby = (Boolean) op.getAnnotations().get(
- OperatorAnnotations.USE_EXTERNAL_GROUP_BY);
- if ((hashGby != null && (hashGby.equals(Boolean.TRUE)) || (externalGby != null && externalGby
- .equals(Boolean.TRUE)))) {
- reviseExchange(op);
- } else {
- ILogicalOperator child = op.getInputs().get(0).getValue();
- AbstractLogicalOperator childOp = (AbstractLogicalOperator) child;
- while (child.getInputs().size() > 0) {
- if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
- break;
- else {
- child = child.getInputs().get(0).getValue();
- childOp = (AbstractLogicalOperator) child;
- }
- }
- if (childOp.getOperatorTag() == LogicalOperatorTag.ORDER)
- reviseExchange(childOp);
- }
- return true;
- }
- return false;
- }
-
- private void reviseExchange(AbstractLogicalOperator op) {
- ExchangeOperator exchange = (ExchangeOperator) op.getInputs().get(0)
- .getValue();
- IPhysicalOperator physicalOp = exchange.getPhysicalOperator();
- if (physicalOp.getOperatorTag() == PhysicalOperatorTag.HASH_PARTITION_EXCHANGE) {
- exchange.setPhysicalOperator(new OneToOneExchangePOperator());
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
deleted file mode 100644
index 9958ba8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/optimizer/rules/RemoveRedundantSelectRule.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.optimizer.rules;
-
-import org.apache.commons.lang3.mutable.Mutable;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
-
-public class RemoveRedundantSelectRule implements IAlgebraicRewriteRule {
-
- @Override
- public boolean rewritePre(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- return false;
- }
-
- @Override
- public boolean rewritePost(Mutable<ILogicalOperator> opRef,
- IOptimizationContext context) throws AlgebricksException {
- AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
- if (op.getOperatorTag() != LogicalOperatorTag.SELECT) {
- return false;
- }
- AbstractLogicalOperator inputOp = (AbstractLogicalOperator) op
- .getInputs().get(0).getValue();
- if (inputOp.getOperatorTag() != LogicalOperatorTag.SELECT) {
- return false;
- }
- SelectOperator selectOp = (SelectOperator) op;
- SelectOperator inputSelectOp = (SelectOperator) inputOp;
- ILogicalExpression expr1 = selectOp.getCondition().getValue();
- ILogicalExpression expr2 = inputSelectOp.getCondition().getValue();
-
- if (expr1.equals(expr2)) {
- selectOp.getInputs().set(0, inputSelectOp.getInputs().get(0));
- return true;
- }
- return false;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java
deleted file mode 100644
index 6b4d697..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/config/ConfUtil.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.config;
-
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
-import edu.uci.ics.hyracks.api.topology.ClusterTopology;
-
-@SuppressWarnings({ "rawtypes", "deprecation" })
-public class ConfUtil {
-
- private static JobConf job;
- private static HiveConf hconf;
- private static String[] NCs;
- private static Map<String, List<String>> ncMapping;
- private static IHyracksClientConnection hcc = null;
- private static ClusterTopology topology = null;
-
- public static JobConf getJobConf(Class<? extends InputFormat> format,
- Path path) {
- JobConf conf = new JobConf();
- if (job != null)
- conf = job;
-
- String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
- Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
- conf.addResource(pathCore);
- Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
- conf.addResource(pathMapRed);
- Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
- conf.addResource(pathHDFS);
-
- conf.setInputFormat(format);
- FileInputFormat.setInputPaths(conf, path);
- return conf;
- }
-
- public static JobConf getJobConf() {
- JobConf conf = new JobConf();
- if (job != null)
- conf = job;
-
- String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
- Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
- conf.addResource(pathCore);
- Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
- conf.addResource(pathMapRed);
- Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
- conf.addResource(pathHDFS);
-
- return conf;
- }
-
- public static void setJobConf(JobConf conf) {
- job = conf;
- }
-
- public static void setHiveConf(HiveConf hiveConf) {
- hconf = hiveConf;
- }
-
- public static HiveConf getHiveConf() {
- if (hconf == null) {
- hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path("conf/hive-default.xml"));
- }
- return hconf;
- }
-
- public static String[] getNCs() throws AlgebricksException {
- if (NCs == null) {
- try {
- loadClusterConfig();
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
- return NCs;
- }
-
- public static Map<String, List<String>> getNCMapping()
- throws AlgebricksException {
- if (ncMapping == null) {
- try {
- loadClusterConfig();
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
- return ncMapping;
- }
-
- private static void loadClusterConfig() {
- try {
- getHiveConf();
- String ipAddress = hconf.get("hive.hyracks.host");
- int port = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int mpl = Integer.parseInt(hconf.get("hive.hyracks.parrallelism"));
- hcc = new HyracksConnection(ipAddress, port);
- topology = hcc.getClusterTopology();
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc
- .getNodeControllerInfos();
- NCs = new String[ncNameToNcInfos.size() * mpl];
- ncMapping = new HashMap<String, List<String>>();
- int i = 0;
- for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos
- .entrySet()) {
- String ipAddr = InetAddress.getByAddress(
- entry.getValue().getNetworkAddress().getIpAddress())
- .getHostAddress();
- List<String> matchedNCs = ncMapping.get(ipAddr);
- if (matchedNCs == null) {
- matchedNCs = new ArrayList<String>();
- ncMapping.put(ipAddr, matchedNCs);
- }
- matchedNCs.add(entry.getKey());
- for (int j = i * mpl; j < i * mpl + mpl; j++)
- NCs[j] = entry.getKey();
- i++;
- }
- } catch (Exception e) {
- throw new IllegalStateException(e);
- }
- }
-
- public static ClusterTopology getClusterTopology() {
- if (topology == null)
- loadClusterConfig();
- return topology;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
deleted file mode 100644
index 8f6d9ca..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AbstractExpressionEvaluator.java
+++ /dev/null
@@ -1,174 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public abstract class AbstractExpressionEvaluator implements ICopyEvaluator {
-
- private List<ICopyEvaluator> children;
-
- private ExprNodeEvaluator evaluator;
-
- private IDataOutputProvider out;
-
- private ObjectInspector inspector;
-
- /**
- * output object inspector
- */
- private ObjectInspector outputInspector;
-
- /**
- * cached row object
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * serializer/derialzer for lazy object
- */
- private SerDe lazySer;
-
- /**
- * data output
- */
- DataOutput dataOutput;
-
- public AbstractExpressionEvaluator(ExprNodeEvaluator hiveEvaluator,
- ObjectInspector oi, IDataOutputProvider output)
- throws AlgebricksException {
- evaluator = hiveEvaluator;
- out = output;
- inspector = oi;
- dataOutput = out.getDataOutput();
- }
-
- protected ObjectInspector getRowInspector() {
- return null;
- }
-
- protected IDataOutputProvider getIDataOutputProvider() {
- return out;
- }
-
- protected ExprNodeEvaluator getHiveEvaluator() {
- return evaluator;
- }
-
- public ObjectInspector getObjectInspector() {
- return inspector;
- }
-
- @Override
- public void evaluate(IFrameTupleReference r) throws AlgebricksException {
- // initialize hive evaluator
- try {
- if (outputInspector == null)
- outputInspector = evaluator.initialize(inspector);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
-
- readIntoCache(r);
- try {
- Object result = evaluator.evaluate(cachedRowObject);
-
- // if (result == null) {
- // result = evaluator.evaluate(cachedRowObject);
- //
- // // check if result is null
- //
- // String errorMsg = "serialize null object in \n output " +
- // outputInspector.toString() + " \n input "
- // + inspector.toString() + "\n ";
- // errorMsg += "";
- // List<Object> columns = ((StructObjectInspector)
- // inspector).getStructFieldsDataAsList(cachedRowObject);
- // for (Object column : columns) {
- // errorMsg += column.toString() + " ";
- // }
- // errorMsg += "\n";
- // Log.info(errorMsg);
- // System.out.println(errorMsg);
- // // result = new BooleanWritable(true);
- // throw new IllegalStateException(errorMsg);
- // }
-
- serializeResult(result);
- } catch (HiveException e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- } catch (IOException e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result) throws IOException,
- AlgebricksException {
- if (lazySer == null)
- lazySer = new LazySerDe();
-
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, outputInspector);
- dataOutput.write(outputWritable.getBytes(), 0,
- outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- if (cachedRowObject == null)
- cachedRowObject = (LazyObject<? extends ObjectInspector>) LazyFactory
- .createLazyObject(inspector);
- cachedRowObject.init(r);
- }
-
- /**
- * set a list of children of this evaluator
- *
- * @param children
- */
- public void setChildren(List<ICopyEvaluator> children) {
- this.children = children;
- }
-
- public void addChild(ICopyEvaluator child) {
- if (children == null)
- children = new ArrayList<ICopyEvaluator>();
- children.add(child);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
deleted file mode 100644
index 271b5e4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregationFunctionEvaluator.java
+++ /dev/null
@@ -1,231 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class AggregationFunctionEvaluator implements ICopyAggregateFunction {
-
- /**
- * the mode of aggregation function
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * an array of evaluators
- */
- private ExprNodeEvaluator[] evaluators;
-
- /**
- * udaf evaluator partial
- */
- private GenericUDAFEvaluator udafPartial;
-
- /**
- * udaf evaluator complete
- */
- private GenericUDAFEvaluator udafComplete;
-
- /**
- * cached parameter objects
- */
- private Object[] cachedParameters;
-
- /**
- * cached row objects
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * the output channel
- */
- private DataOutput out;
-
- /**
- * aggregation buffer
- */
- private AggregationBuffer aggBuffer;
-
- /**
- * we only use lazy serde to do serialization
- */
- private SerDe lazySer;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspector;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspectorPartial;
-
- /**
- * parameter inspectors
- */
- private ObjectInspector[] parameterInspectors;
-
- /**
- * output make sure the aggregation functio has least object creation
- *
- * @param desc
- * @param oi
- * @param output
- */
- public AggregationFunctionEvaluator(List<ExprNodeDesc> inputs,
- List<TypeInfo> inputTypes, String genericUDAFName,
- GenericUDAFEvaluator.Mode aggMode, boolean distinct,
- ObjectInspector oi, DataOutput output, ExprNodeEvaluator[] evals,
- ObjectInspector[] pInspectors, Object[] parameterCache,
- SerDe serde, LazyObject<? extends ObjectInspector> row,
- GenericUDAFEvaluator udafunctionPartial,
- GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi,
- ObjectInspector outputOiPartial) {
- // shared object across threads
- this.out = output;
- this.mode = aggMode;
- this.parameterInspectors = pInspectors;
-
- // thread local objects
- this.evaluators = evals;
- this.cachedParameters = parameterCache;
- this.cachedRowObject = row;
- this.lazySer = serde;
- this.udafPartial = udafunctionPartial;
- this.udafComplete = udafunctionComplete;
- this.outputInspector = outputOi;
- this.outputInspectorPartial = outputOiPartial;
- }
-
- @Override
- public void init() throws AlgebricksException {
- try {
- aggBuffer = udafPartial.getNewAggregationBuffer();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void step(IFrameTupleReference tuple) throws AlgebricksException {
- readIntoCache(tuple);
- processRow();
- }
-
- private void processRow() throws AlgebricksException {
- try {
- // get values by evaluating them
- for (int i = 0; i < cachedParameters.length; i++) {
- cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
- }
- processAggregate();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void processAggregate() throws HiveException {
- /**
- * accumulate the aggregation function
- */
- switch (mode) {
- case PARTIAL1:
- case COMPLETE:
- udafPartial.iterate(aggBuffer, cachedParameters);
- break;
- case PARTIAL2:
- case FINAL:
- if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
- Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
- .getPrimitiveWritableObject(cachedParameters[0]);
- udafPartial.merge(aggBuffer, parameter);
- } else
- udafPartial.merge(aggBuffer, cachedParameters[0]);
- break;
- default:
- break;
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result, ObjectInspector oi)
- throws IOException, AlgebricksException {
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, oi);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- }
-
- @Override
- public void finish() throws AlgebricksException {
- // aggregator
- try {
- Object result = null;
- result = udafPartial.terminatePartial(aggBuffer);
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE
- || mode == GenericUDAFEvaluator.Mode.FINAL) {
- result = udafComplete.terminate(aggBuffer);
- serializeResult(result, outputInspector);
- } else {
- serializeResult(result, outputInspectorPartial);
- }
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void finishPartial() throws AlgebricksException {
- // aggregator.
- try {
- Object result = null;
- // get aggregations
- result = udafPartial.terminatePartial(aggBuffer);
- serializeResult(result, outputInspectorPartial);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
deleted file mode 100644
index 032437b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/AggregatuibFunctionSerializableEvaluator.java
+++ /dev/null
@@ -1,259 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCount;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class AggregatuibFunctionSerializableEvaluator implements
- ICopySerializableAggregateFunction {
-
- /**
- * the mode of aggregation function
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * an array of evaluators
- */
- private ExprNodeEvaluator[] evaluators;
-
- /**
- * udaf evaluator partial
- */
- private GenericUDAFEvaluator udafPartial;
-
- /**
- * udaf evaluator complete
- */
- private GenericUDAFEvaluator udafComplete;
-
- /**
- * cached parameter objects
- */
- private Object[] cachedParameters;
-
- /**
- * cached row objects
- */
- private LazyObject<? extends ObjectInspector> cachedRowObject;
-
- /**
- * aggregation buffer
- */
- private SerializableBuffer aggBuffer;
-
- /**
- * we only use lazy serde to do serialization
- */
- private SerDe lazySer;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspector;
-
- /**
- * the output object inspector for this aggregation function
- */
- private ObjectInspector outputInspectorPartial;
-
- /**
- * parameter inspectors
- */
- private ObjectInspector[] parameterInspectors;
-
- /**
- * output make sure the aggregation functio has least object creation
- *
- * @param desc
- * @param oi
- * @param output
- */
- public AggregatuibFunctionSerializableEvaluator(List<ExprNodeDesc> inputs,
- List<TypeInfo> inputTypes, String genericUDAFName,
- GenericUDAFEvaluator.Mode aggMode, boolean distinct,
- ObjectInspector oi, ExprNodeEvaluator[] evals,
- ObjectInspector[] pInspectors, Object[] parameterCache,
- SerDe serde, LazyObject<? extends ObjectInspector> row,
- GenericUDAFEvaluator udafunctionPartial,
- GenericUDAFEvaluator udafunctionComplete, ObjectInspector outputOi,
- ObjectInspector outputOiPartial) throws AlgebricksException {
- // shared object across threads
- this.mode = aggMode;
- this.parameterInspectors = pInspectors;
-
- // thread local objects
- this.evaluators = evals;
- this.cachedParameters = parameterCache;
- this.cachedRowObject = row;
- this.lazySer = serde;
- this.udafPartial = udafunctionPartial;
- this.udafComplete = udafunctionComplete;
- this.outputInspector = outputOi;
- this.outputInspectorPartial = outputOiPartial;
-
- try {
- aggBuffer = (SerializableBuffer) udafPartial
- .getNewAggregationBuffer();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void init(DataOutput output) throws AlgebricksException {
- try {
- udafPartial.reset(aggBuffer);
- outputAggBuffer(aggBuffer, output);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void step(IFrameTupleReference tuple, byte[] data, int start, int len)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- readIntoCache(tuple);
- processRow();
- serializeAggBuffer(aggBuffer, data, start, len);
- }
-
- private void processRow() throws AlgebricksException {
- try {
- // get values by evaluating them
- for (int i = 0; i < cachedParameters.length; i++) {
- cachedParameters[i] = evaluators[i].evaluate(cachedRowObject);
- }
- processAggregate();
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void processAggregate() throws HiveException {
- /**
- * accumulate the aggregation function
- */
- switch (mode) {
- case PARTIAL1:
- case COMPLETE:
- udafPartial.iterate(aggBuffer, cachedParameters);
- break;
- case PARTIAL2:
- case FINAL:
- if (udafPartial instanceof GenericUDAFCount.GenericUDAFCountEvaluator) {
- Object parameter = ((PrimitiveObjectInspector) parameterInspectors[0])
- .getPrimitiveWritableObject(cachedParameters[0]);
- udafPartial.merge(aggBuffer, parameter);
- } else
- udafPartial.merge(aggBuffer, cachedParameters[0]);
- break;
- default:
- break;
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result, ObjectInspector oi,
- DataOutput out) throws IOException, AlgebricksException {
- try {
- BytesWritable outputWritable = (BytesWritable) lazySer.serialize(
- result, oi);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- } catch (SerDeException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- }
-
- @Override
- public void finish(byte[] data, int start, int len, DataOutput output)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- // aggregator
- try {
- Object result = null;
- result = udafPartial.terminatePartial(aggBuffer);
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE
- || mode == GenericUDAFEvaluator.Mode.FINAL) {
- result = udafComplete.terminate(aggBuffer);
- serializeResult(result, outputInspector, output);
- } else {
- serializeResult(result, outputInspectorPartial, output);
- }
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- @Override
- public void finishPartial(byte[] data, int start, int len, DataOutput output)
- throws AlgebricksException {
- deSerializeAggBuffer(aggBuffer, data, start, len);
- // aggregator.
- try {
- Object result = null;
- // get aggregations
- result = udafPartial.terminatePartial(aggBuffer);
- serializeResult(result, outputInspectorPartial, output);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- private void serializeAggBuffer(SerializableBuffer buffer, byte[] data,
- int start, int len) throws AlgebricksException {
- buffer.serializeAggBuffer(data, start, len);
- }
-
- private void deSerializeAggBuffer(SerializableBuffer buffer, byte[] data,
- int start, int len) throws AlgebricksException {
- buffer.deSerializeAggBuffer(data, start, len);
- }
-
- private void outputAggBuffer(SerializableBuffer buffer, DataOutput out)
- throws AlgebricksException {
- try {
- buffer.serializeAggBuffer(out);
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
deleted file mode 100644
index d73be93..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/BufferSerDeUtil.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-public class BufferSerDeUtil {
-
- public static double getDouble(byte[] bytes, int offset) {
- return Double.longBitsToDouble(getLong(bytes, offset));
- }
-
- public static float getFloat(byte[] bytes, int offset) {
- return Float.intBitsToFloat(getInt(bytes, offset));
- }
-
- public static boolean getBoolean(byte[] bytes, int offset) {
- if (bytes[offset] == 0)
- return false;
- else
- return true;
- }
-
- public static int getInt(byte[] bytes, int offset) {
- return ((bytes[offset] & 0xff) << 24)
- + ((bytes[offset + 1] & 0xff) << 16)
- + ((bytes[offset + 2] & 0xff) << 8)
- + ((bytes[offset + 3] & 0xff) << 0);
- }
-
- public static long getLong(byte[] bytes, int offset) {
- return (((long) (bytes[offset] & 0xff)) << 56)
- + (((long) (bytes[offset + 1] & 0xff)) << 48)
- + (((long) (bytes[offset + 2] & 0xff)) << 40)
- + (((long) (bytes[offset + 3] & 0xff)) << 32)
- + (((long) (bytes[offset + 4] & 0xff)) << 24)
- + (((long) (bytes[offset + 5] & 0xff)) << 16)
- + (((long) (bytes[offset + 6] & 0xff)) << 8)
- + (((long) (bytes[offset + 7] & 0xff)) << 0);
- }
-
- public static void writeBoolean(boolean value, byte[] bytes, int offset) {
- if (value)
- bytes[offset] = (byte) 1;
- else
- bytes[offset] = (byte) 0;
- }
-
- public static void writeInt(int value, byte[] bytes, int offset) {
- bytes[offset++] = (byte) (value >> 24);
- bytes[offset++] = (byte) (value >> 16);
- bytes[offset++] = (byte) (value >> 8);
- bytes[offset++] = (byte) (value);
- }
-
- public static void writeLong(long value, byte[] bytes, int offset) {
- bytes[offset++] = (byte) (value >> 56);
- bytes[offset++] = (byte) (value >> 48);
- bytes[offset++] = (byte) (value >> 40);
- bytes[offset++] = (byte) (value >> 32);
- bytes[offset++] = (byte) (value >> 24);
- bytes[offset++] = (byte) (value >> 16);
- bytes[offset++] = (byte) (value >> 8);
- bytes[offset++] = (byte) (value);
- }
-
- public static void writeDouble(double value, byte[] bytes, int offset) {
- long lValue = Double.doubleToLongBits(value);
- writeLong(lValue, bytes, offset);
- }
-
- public static void writeFloat(float value, byte[] bytes, int offset) {
- int iValue = Float.floatToIntBits(value);
- writeInt(iValue, bytes, offset);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java
deleted file mode 100644
index 2180910..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/ExpressionTranslator.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang3.mutable.Mutable;
-import org.apache.hadoop.hive.ql.exec.FunctionInfo;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.HiveAlgebricksBuiltInFunctionMap;
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.HivesterixConstantValue;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-
-public class ExpressionTranslator {
-
- public static Object getHiveExpression(ILogicalExpression expr,
- IVariableTypeEnvironment env) throws Exception {
- if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
- /**
- * function expression
- */
- AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
- IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
- FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- Object info = ((HiveFunctionInfo) funcInfo).getInfo();
- ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
- return new ExprNodeFieldDesc(desc.getTypeInfo(),
- desc.getDesc(), desc.getFieldName(), desc.getIsList());
- }
-
- if (fid.getName().equals(ExpressionConstant.NULL)) {
- return new ExprNodeNullDesc();
- }
-
- /**
- * argument expressions: translate argument expressions recursively
- * first, this logic is shared in scalar, aggregation and unnesting
- * function
- */
- List<Mutable<ILogicalExpression>> arguments = funcExpr
- .getArguments();
- List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
- for (Mutable<ILogicalExpression> argument : arguments) {
- /**
- * parameters could not be aggregate function desc
- */
- ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(
- argument.getValue(), env);
- parameters.add(parameter);
- }
-
- /**
- * get expression
- */
- if (funcExpr instanceof ScalarFunctionCallExpression) {
- String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE
- .getHiveFunctionName(fid);
- GenericUDF udf;
- if (udfName != null) {
- /**
- * get corresponding function info for built-in functions
- */
- FunctionInfo fInfo = FunctionRegistry
- .getFunctionInfo(udfName);
- udf = fInfo.getGenericUDF();
-
- int inputSize = parameters.size();
- List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
-
- // generate expression tree if necessary
- while (inputSize > 2) {
- int pairs = inputSize / 2;
- for (int i = 0; i < pairs; i++) {
- List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
- descs.add(parameters.get(2 * i));
- descs.add(parameters.get(2 * i + 1));
- ExprNodeDesc desc = ExprNodeGenericFuncDesc
- .newInstance(udf, descs);
- currentDescs.add(desc);
- }
-
- if (inputSize % 2 != 0) {
- // List<ExprNodeDesc> descs = new
- // ArrayList<ExprNodeDesc>();
- // ExprNodeDesc lastExpr =
- // currentDescs.remove(currentDescs.size() - 1);
- // descs.add(lastExpr);
- currentDescs.add(parameters.get(inputSize - 1));
- // ExprNodeDesc desc =
- // ExprNodeGenericFuncDesc.newInstance(udf, descs);
- // currentDescs.add(desc);
- }
- inputSize = currentDescs.size();
- parameters.clear();
- parameters.addAll(currentDescs);
- currentDescs.clear();
- }
-
- } else {
- Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
- if (secondInfo != null) {
-
- /**
- * for GenericUDFBridge: we should not call get type of
- * this hive expression, because parameters may have
- * been changed!
- */
- ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
- .getInfo();
- udf = hiveExpr.getGenericUDF();
- } else {
- /**
- * for other generic UDF
- */
- Class<?> udfClass;
- try {
- udfClass = Class.forName(fid.getName());
- udf = (GenericUDF) udfClass.newInstance();
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- }
- }
- /**
- * get hive generic function expression
- */
- ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf,
- parameters);
- return desc;
- } else if (funcExpr instanceof AggregateFunctionCallExpression) {
- /**
- * hive aggregation info
- */
- AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- /**
- * set parameters
- */
- aggregateDesc
- .setParameters((ArrayList<ExprNodeDesc>) parameters);
-
- List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
- for (ExprNodeDesc parameter : parameters) {
- if (parameter.getTypeInfo() instanceof StructTypeInfo) {
- originalParameterTypeInfos
- .add(TypeInfoFactory.doubleTypeInfo);
- } else
- originalParameterTypeInfos.add(parameter.getTypeInfo());
- }
-
- GenericUDAFEvaluator eval = FunctionRegistry
- .getGenericUDAFEvaluator(
- aggregateDesc.getGenericUDAFName(),
- originalParameterTypeInfos,
- aggregateDesc.getDistinct(), false);
-
- AggregationDesc newAggregateDesc = new AggregationDesc(
- aggregateDesc.getGenericUDAFName(), eval,
- aggregateDesc.getParameters(),
- aggregateDesc.getDistinct(), aggregateDesc.getMode());
- return newAggregateDesc;
- } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
- /**
- * type inference for UDTF function
- */
- UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr
- .getFunctionInfo()).getInfo();
- String funcName = hiveDesc.getUDTFName();
- FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
- GenericUDTF udtf = fi.getGenericUDTF();
- UDTFDesc desc = new UDTFDesc(udtf);
- return desc;
- } else {
- throw new IllegalStateException(
- "unrecognized function expression "
- + expr.getClass().getName());
- }
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
- /**
- * get type for variable in the environment
- */
- VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
- LogicalVariable var = varExpr.getVariableReference();
- TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
- ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo,
- var.toString(), "", false);
- return desc;
- } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
- /**
- * get expression for constant in the environment
- */
- ConstantExpression varExpr = (ConstantExpression) expr;
- Object value = ((HivesterixConstantValue) varExpr.getValue())
- .getObject();
- ExprNodeDesc desc = new ExprNodeConstantDesc(value);
- return desc;
- } else {
- throw new IllegalStateException("illegal expressions "
- + expr.getClass().getName());
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
deleted file mode 100644
index 328b384..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/SerializableBuffer.java
+++ /dev/null
@@ -1,16 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AggregationBuffer;
-
-public interface SerializableBuffer extends AggregationBuffer {
-
- public void deSerializeAggBuffer(byte[] data, int start, int len);
-
- public void serializeAggBuffer(byte[] data, int start, int len);
-
- public void serializeAggBuffer(DataOutput output) throws IOException;
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
deleted file mode 100644
index de0141b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/evaluator/UDTFFunctionEvaluator.java
+++ /dev/null
@@ -1,147 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.evaluator;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-import org.apache.hadoop.hive.ql.udf.generic.Collector;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-public class UDTFFunctionEvaluator implements ICopyUnnestingFunction, Collector {
-
- /**
- * udtf function
- */
- private UDTFDesc func;
-
- /**
- * input object inspector
- */
- private ObjectInspector inputInspector;
-
- /**
- * output object inspector
- */
- private ObjectInspector outputInspector;
-
- /**
- * object inspector for udtf
- */
- private ObjectInspector[] udtfInputOIs;
-
- /**
- * generic udtf
- */
- private GenericUDTF udtf;
-
- /**
- * data output
- */
- private DataOutput out;
-
- /**
- * the input row object
- */
- private LazyColumnar cachedRowObject;
-
- /**
- * cached row object (input)
- */
- private Object[] cachedInputObjects;
-
- /**
- * serialization/deserialization
- */
- private SerDe lazySerDe;
-
- /**
- * columns feed into UDTF
- */
- private int[] columns;
-
- public UDTFFunctionEvaluator(UDTFDesc desc, Schema schema, int[] cols,
- DataOutput output) {
- this.func = desc;
- this.inputInspector = schema.toObjectInspector();
- udtf = func.getGenericUDTF();
- out = output;
- columns = cols;
- }
-
- @Override
- public void init(IFrameTupleReference tuple) throws AlgebricksException {
- cachedInputObjects = new LazyObject[columns.length];
- try {
- cachedRowObject = (LazyColumnar) LazyFactory
- .createLazyObject(inputInspector);
- outputInspector = udtf.initialize(udtfInputOIs);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udtf.setCollector(this);
- lazySerDe = new LazySerDe();
- readIntoCache(tuple);
- }
-
- @Override
- public boolean step() throws AlgebricksException {
- try {
- udtf.process(cachedInputObjects);
- return true;
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * bind the tuple reference to the cached row object
- *
- * @param r
- */
- private void readIntoCache(IFrameTupleReference r) {
- cachedRowObject.init(r);
- for (int i = 0; i < cachedInputObjects.length; i++) {
- cachedInputObjects[i] = cachedRowObject.getField(columns[i]);
- }
- }
-
- /**
- * serialize the result
- *
- * @param result
- * the evaluation result
- * @throws IOException
- * @throws AlgebricksException
- */
- private void serializeResult(Object result) throws SerDeException,
- IOException {
- BytesWritable outputWritable = (BytesWritable) lazySerDe.serialize(
- result, outputInspector);
- out.write(outputWritable.getBytes(), 0, outputWritable.getLength());
- }
-
- @Override
- public void collect(Object input) throws HiveException {
- try {
- serializeResult(input);
- } catch (IOException e) {
- throw new HiveException(e);
- } catch (SerDeException e) {
- throw new HiveException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
deleted file mode 100644
index 8f4c471..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.exec;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.Task;
-
-public interface IExecutionEngine {
-
- /**
- * compile the job
- *
- * @param rootTasks
- * : Hive MapReduce plan
- * @return 0 pass, 1 fail
- */
- public int compileJob(List<Task<? extends Serializable>> rootTasks);
-
- /**
- * execute the job with latest compiled plan
- *
- * @return
- */
- public int executeJob();
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
deleted file mode 100644
index 9c2d463..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveByteBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveByteBinaryAscComparatorFactory INSTANCE = new HiveByteBinaryAscComparatorFactory();
-
- private HiveByteBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private byte left;
- private byte right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = b1[s1];
- right = b2[s2];
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
deleted file mode 100644
index ee71655..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveByteBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveByteBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveByteBinaryDescComparatorFactory INSTANCE = new HiveByteBinaryDescComparatorFactory();
-
- private HiveByteBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private byte left;
- private byte right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = b1[s1];
- right = b2[s2];
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
deleted file mode 100644
index 739e417..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveDoubleBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryAscComparatorFactory INSTANCE = new HiveDoubleBinaryAscComparatorFactory();
-
- private HiveDoubleBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private double left;
- private double right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(b1, s1));
- right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2,
- s2));
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
deleted file mode 100644
index 0424c9f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveDoubleBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveDoubleBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryDescComparatorFactory INSTANCE = new HiveDoubleBinaryDescComparatorFactory();
-
- private HiveDoubleBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private double left;
- private double right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Double.longBitsToDouble(LazyUtils
- .byteArrayToLong(b1, s1));
- right = Double.longBitsToDouble(LazyUtils.byteArrayToLong(b2,
- s2));
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
deleted file mode 100644
index 08542a7..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveFloatBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveFloatBinaryAscComparatorFactory INSTANCE = new HiveFloatBinaryAscComparatorFactory();
-
- private HiveFloatBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private float left;
- private float right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
- right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
deleted file mode 100644
index 513512e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveFloatBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveFloatBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveFloatBinaryDescComparatorFactory INSTANCE = new HiveFloatBinaryDescComparatorFactory();
-
- private HiveFloatBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private float left;
- private float right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b1, s1));
- right = Float.intBitsToFloat(LazyUtils.byteArrayToInt(b2, s2));
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
deleted file mode 100644
index 947f30f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveIntegerBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveIntegerBinaryAscComparatorFactory INSTANCE = new HiveIntegerBinaryAscComparatorFactory();
-
- private HiveIntegerBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt left = new VInt();
- private VInt right = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, left);
- LazyUtils.readVInt(b2, s2, right);
-
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + "," + right.length
- + " expected " + l1 + "," + l2);
-
- if (left.value > right.value)
- return 1;
- else if (left.value == right.value)
- return 0;
- else
- return -1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
deleted file mode 100644
index 7614aa1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveIntegerBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveIntegerBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveIntegerBinaryDescComparatorFactory INSTANCE = new HiveIntegerBinaryDescComparatorFactory();
-
- private HiveIntegerBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt left = new VInt();
- private VInt right = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, left);
- LazyUtils.readVInt(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return -1;
- else if (left.value == right.value)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
deleted file mode 100644
index f5f3473..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveLongBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveLongBinaryAscComparatorFactory INSTANCE = new HiveLongBinaryAscComparatorFactory();
-
- private HiveLongBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VLong left = new VLong();
- private VLong right = new VLong();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVLong(b1, s1, left);
- LazyUtils.readVLong(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return 1;
- else if (left.value == right.value)
- return 0;
- else
- return -1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
deleted file mode 100644
index b878b22..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveLongBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveLongBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static final HiveLongBinaryDescComparatorFactory INSTANCE = new HiveLongBinaryDescComparatorFactory();
-
- private HiveLongBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VLong left = new VLong();
- private VLong right = new VLong();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVLong(b1, s1, left);
- LazyUtils.readVLong(b2, s2, right);
- if (left.length != l1 || right.length != l2)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + left.length + " expected " + l1);
- if (left.value > right.value)
- return -1;
- else if (left.value == right.value)
- return 0;
- else
- return 1;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
deleted file mode 100644
index 8d55cdb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveShortBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveShortBinaryAscComparatorFactory INSTANCE = new HiveShortBinaryAscComparatorFactory();
-
- private HiveShortBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private short left;
- private short right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = LazyUtils.byteArrayToShort(b1, s1);
- right = LazyUtils.byteArrayToShort(b2, s2);
- if (left > right)
- return 1;
- else if (left == right)
- return 0;
- else
- return -1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
deleted file mode 100644
index 4e8dde6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveShortBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveShortBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveShortBinaryDescComparatorFactory INSTANCE = new HiveShortBinaryDescComparatorFactory();
-
- private HiveShortBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private short left;
- private short right;
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- left = LazyUtils.byteArrayToShort(b1, s1);
- right = LazyUtils.byteArrayToShort(b2, s2);
- if (left > right)
- return -1;
- else if (left == right)
- return 0;
- else
- return 1;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
deleted file mode 100644
index a334ecf..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryAscComparatorFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveStringBinaryAscComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStringBinaryAscComparatorFactory INSTANCE = new HiveStringBinaryAscComparatorFactory();
-
- private HiveStringBinaryAscComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt leftLen = new VInt();
- private VInt rightLen = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, leftLen);
- LazyUtils.readVInt(b2, s2, rightLen);
-
- if (leftLen.value + leftLen.length != l1
- || rightLen.value + rightLen.length != l2)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (leftLen.value + leftLen.length) + ", "
- + (rightLen.value + rightLen.length)
- + " but get " + l1 + ", " + l2);
-
- return Text.Comparator.compareBytes(b1, s1 + leftLen.length, l1
- - leftLen.length, b2, s2 + rightLen.length, l2
- - rightLen.length);
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
deleted file mode 100644
index e00b58e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/comparator/HiveStringBinaryDescComparatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.comparator;
-
-import org.apache.hadoop.io.WritableComparator;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveStringBinaryDescComparatorFactory implements
- IBinaryComparatorFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStringBinaryDescComparatorFactory INSTANCE = new HiveStringBinaryDescComparatorFactory();
-
- private HiveStringBinaryDescComparatorFactory() {
- }
-
- @Override
- public IBinaryComparator createBinaryComparator() {
- return new IBinaryComparator() {
- private VInt leftLen = new VInt();
- private VInt rightLen = new VInt();
-
- @Override
- public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
- int l2) {
- LazyUtils.readVInt(b1, s1, leftLen);
- LazyUtils.readVInt(b2, s2, rightLen);
-
- if (leftLen.value + leftLen.length != l1
- || rightLen.value + rightLen.length != l2)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (leftLen.value + leftLen.length) + ", "
- + (rightLen.value + rightLen.length)
- + " but get " + l1 + ", " + l2);
-
- return -WritableComparator.compareBytes(b1,
- s1 + leftLen.length, l1 - leftLen.length, b2, s2
- + rightLen.length, l2 - rightLen.length);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
deleted file mode 100644
index c6078ca..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionFactory.java
+++ /dev/null
@@ -1,381 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregationFunctionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyAggregateFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class AggregationFunctionFactory implements
- ICopyAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- *
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionFactory(
- AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(),
- aggregator.getMode(), aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name,
- GenericUDAFEvaluator.Mode udafMode, boolean distinct, Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopyAggregateFunction createAggregateFunction(
- IDataOutputProvider provider) throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(
- serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects
- .get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i]
- .initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode,
- parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregationFunctionEvaluator(parameters, types,
- genericUDAFName, mode, distinct, rowInspector,
- provider.getDataOutput(), evals, parameterInspectors,
- cachedParas, lazySer, cachedRowObject, udafPartial,
- udafComplete, outputInspector, outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: "
- + this.genericUDAFName;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
deleted file mode 100644
index 73717a3..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/AggregationFunctionSerializableFactory.java
+++ /dev/null
@@ -1,381 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
-import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory;
-import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.AggregationDesc;
-import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.AggregatuibFunctionSerializableEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.serde.lazy.LazyFactory;
-import edu.uci.ics.hivesterix.serde.lazy.LazyObject;
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
-
-public class AggregationFunctionSerializableFactory implements
- ICopySerializableAggregateFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- /**
- * list of parameters' serialization
- */
- private List<String> parametersSerialization = new ArrayList<String>();
-
- /**
- * the name of the udf
- */
- private String genericUDAFName;
-
- /**
- * aggregation mode
- */
- private GenericUDAFEvaluator.Mode mode;
-
- /**
- * list of type info
- */
- private List<TypeInfo> types = new ArrayList<TypeInfo>();
-
- /**
- * distinct or not
- */
- private boolean distinct;
-
- /**
- * the schema of incoming rows
- */
- private Schema rowSchema;
-
- /**
- * list of parameters
- */
- private transient List<ExprNodeDesc> parametersOrigin;
-
- /**
- * row inspector
- */
- private transient ObjectInspector rowInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspector = null;
-
- /**
- * output object inspector
- */
- private transient ObjectInspector outputInspectorPartial = null;
-
- /**
- * parameter inspectors
- */
- private transient ObjectInspector[] parameterInspectors = null;
-
- /**
- * expression desc
- */
- private transient HashMap<Long, List<ExprNodeDesc>> parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- private transient HashMap<Long, ExprNodeEvaluator[]> evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- private transient HashMap<Long, Object[]> cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- private transient HashMap<Long, LazyObject<? extends ObjectInspector>> cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- private transient HashMap<Long, SerDe> serDe = new HashMap<Long, SerDe>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * udaf evaluators
- */
- private transient HashMap<Long, GenericUDAFEvaluator> udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * aggregation function desc
- */
- private transient AggregationDesc aggregator;
-
- /**
- *
- * @param aggregator
- * Algebricks function call expression
- * @param oi
- * schema
- */
- public AggregationFunctionSerializableFactory(
- AggregateFunctionCallExpression expression, Schema oi,
- IVariableTypeEnvironment env) throws AlgebricksException {
-
- try {
- aggregator = (AggregationDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- init(aggregator.getParameters(), aggregator.getGenericUDAFName(),
- aggregator.getMode(), aggregator.getDistinct(), oi);
- }
-
- /**
- * constructor of aggregation function factory
- *
- * @param inputs
- * @param name
- * @param udafMode
- * @param distinct
- * @param oi
- */
- private void init(List<ExprNodeDesc> inputs, String name,
- GenericUDAFEvaluator.Mode udafMode, boolean distinct, Schema oi) {
- parametersOrigin = inputs;
- genericUDAFName = name;
- mode = udafMode;
- this.distinct = distinct;
- rowSchema = oi;
-
- for (ExprNodeDesc input : inputs) {
- TypeInfo type = input.getTypeInfo();
- if (type instanceof StructTypeInfo) {
- types.add(TypeInfoFactory.doubleTypeInfo);
- } else
- types.add(type);
-
- String s = Utilities.serializeExpression(input);
- parametersSerialization.add(s);
- }
- }
-
- @Override
- public synchronized ICopySerializableAggregateFunction createAggregateFunction()
- throws AlgebricksException {
- if (parametersOrigin == null) {
- Configuration config = new Configuration();
- config.setClassLoader(this.getClass().getClassLoader());
- /**
- * in case of class.forname(...) call in hive code
- */
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- parametersOrigin = new ArrayList<ExprNodeDesc>();
- for (String serialization : parametersSerialization) {
- parametersOrigin.add(Utilities.deserializeExpression(
- serialization, config));
- }
- }
-
- /**
- * exprs
- */
- if (parameterExprs == null)
- parameterExprs = new HashMap<Long, List<ExprNodeDesc>>();
-
- /**
- * evaluators
- */
- if (evaluators == null)
- evaluators = new HashMap<Long, ExprNodeEvaluator[]>();
-
- /**
- * cached parameter objects
- */
- if (cachedParameters == null)
- cachedParameters = new HashMap<Long, Object[]>();
-
- /**
- * cached row object: one per thread
- */
- if (cachedRowObjects == null)
- cachedRowObjects = new HashMap<Long, LazyObject<? extends ObjectInspector>>();
-
- /**
- * we only use lazy serde to do serialization
- */
- if (serDe == null)
- serDe = new HashMap<Long, SerDe>();
-
- /**
- * UDAF functions
- */
- if (udafsComplete == null)
- udafsComplete = new HashMap<Long, GenericUDAFEvaluator>();
-
- /**
- * UDAF functions
- */
- if (udafsPartial == null)
- udafsPartial = new HashMap<Long, GenericUDAFEvaluator>();
-
- if (parameterInspectors == null)
- parameterInspectors = new ObjectInspector[parametersOrigin.size()];
-
- if (rowInspector == null)
- rowInspector = rowSchema.toObjectInspector();
-
- // get current thread id
- long threadId = Thread.currentThread().getId();
-
- /**
- * expressions, expressions are thread local
- */
- List<ExprNodeDesc> parameters = parameterExprs.get(threadId);
- if (parameters == null) {
- parameters = new ArrayList<ExprNodeDesc>();
- for (ExprNodeDesc parameter : parametersOrigin)
- parameters.add(parameter.clone());
- parameterExprs.put(threadId, parameters);
- }
-
- /**
- * cached parameter objects
- */
- Object[] cachedParas = cachedParameters.get(threadId);
- if (cachedParas == null) {
- cachedParas = new Object[parameters.size()];
- cachedParameters.put(threadId, cachedParas);
- }
-
- /**
- * cached row object: one per thread
- */
- LazyObject<? extends ObjectInspector> cachedRowObject = cachedRowObjects
- .get(threadId);
- if (cachedRowObject == null) {
- cachedRowObject = LazyFactory.createLazyObject(rowInspector);
- cachedRowObjects.put(threadId, cachedRowObject);
- }
-
- /**
- * we only use lazy serde to do serialization
- */
- SerDe lazySer = serDe.get(threadId);
- if (lazySer == null) {
- lazySer = new LazySerDe();
- serDe.put(threadId, lazySer);
- }
-
- /**
- * evaluators
- */
- ExprNodeEvaluator[] evals = evaluators.get(threadId);
- if (evals == null) {
- evals = new ExprNodeEvaluator[parameters.size()];
- evaluators.put(threadId, evals);
- }
-
- GenericUDAFEvaluator udafPartial;
- GenericUDAFEvaluator udafComplete;
-
- // initialize object inspectors
- try {
- /**
- * evaluators, udf, object inpsectors are shared in one thread
- */
- for (int i = 0; i < evals.length; i++) {
- if (evals[i] == null) {
- evals[i] = ExprNodeEvaluatorFactory.get(parameters.get(i));
- if (parameterInspectors[i] == null) {
- parameterInspectors[i] = evals[i]
- .initialize(rowInspector);
- } else {
- evals[i].initialize(rowInspector);
- }
- }
- }
-
- udafComplete = udafsComplete.get(threadId);
- if (udafComplete == null) {
- try {
- udafComplete = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafsComplete.put(threadId, udafComplete);
- udafComplete.init(mode, parameterInspectors);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspector == null)
- outputInspector = udafComplete.init(mode, parameterInspectors);
-
- // initial partial gby udaf
- GenericUDAFEvaluator.Mode partialMode;
- // adjust mode for external groupby
- if (mode == GenericUDAFEvaluator.Mode.COMPLETE)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL1;
- else if (mode == GenericUDAFEvaluator.Mode.FINAL)
- partialMode = GenericUDAFEvaluator.Mode.PARTIAL2;
- else
- partialMode = mode;
- udafPartial = udafsPartial.get(threadId);
- if (udafPartial == null) {
- try {
- udafPartial = FunctionRegistry.getGenericUDAFEvaluator(
- genericUDAFName, types, distinct, false);
- } catch (HiveException e) {
- throw new AlgebricksException(e);
- }
- udafPartial.init(partialMode, parameterInspectors);
- udafsPartial.put(threadId, udafPartial);
- }
-
- // multiple stage group by, determined by the mode parameter
- if (outputInspectorPartial == null)
- outputInspectorPartial = udafPartial.init(partialMode,
- parameterInspectors);
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e);
- }
-
- return new AggregatuibFunctionSerializableEvaluator(parameters, types,
- genericUDAFName, mode, distinct, rowInspector, evals,
- parameterInspectors, cachedParas, lazySer, cachedRowObject,
- udafPartial, udafComplete, outputInspector,
- outputInspectorPartial);
- }
-
- public String toString() {
- return "aggregation function expression evaluator factory: "
- + this.genericUDAFName;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
deleted file mode 100644
index 68bf408..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ColumnExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ColumnExpressionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ColumnExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeColumnDesc expr;
-
- private Schema inputSchema;
-
- public ColumnExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeColumnDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new ColumnExpressionEvaluator(expr,
- inputSchema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "column expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
deleted file mode 100644
index e0241a1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ConstantExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ConstantExpressionEvaluator;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ConstantExpressionEvaluatorFactory implements
- ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeConstantDesc expr;
-
- private Schema schema;
-
- public ConstantExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema inputSchema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeConstantDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- schema = inputSchema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new ConstantExpressionEvaluator(expr,
- schema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "constant expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
deleted file mode 100644
index 4b5f906..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/FieldExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.FieldExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class FieldExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
- private static final long serialVersionUID = 1L;
-
- private ExprNodeFieldDesc expr;
-
- private Schema inputSchema;
-
- public FieldExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeFieldDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new FieldExpressionEvaluator(expr,
- inputSchema.toObjectInspector(), output);
- }
-
- public String toString() {
- return "field access expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
deleted file mode 100644
index 387ca72..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/HiveExpressionRuntimeProvider.java
+++ /dev/null
@@ -1,192 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-
-import edu.uci.ics.hivesterix.logical.expression.ExpressionConstant;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression.FunctionKind;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionRuntimeProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.AggregateFunctionFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.ScalarEvaluatorFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.LogicalExpressionJobGenToExpressionRuntimeProviderAdapter.UnnestingFunctionFactoryAdapter;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.StatefulFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IAggregateEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopySerializableAggregateFunctionFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IRunningAggregateEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IUnnestingEvaluatorFactory;
-
-public class HiveExpressionRuntimeProvider implements
- IExpressionRuntimeProvider {
-
- public static final IExpressionRuntimeProvider INSTANCE = new HiveExpressionRuntimeProvider();
-
- @Override
- public IAggregateEvaluatorFactory createAggregateFunctionFactory(
- AggregateFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new AggregateFunctionFactoryAdapter(
- new AggregationFunctionFactory(expr, schema, env));
- }
-
- @Override
- public ICopySerializableAggregateFunctionFactory createSerializableAggregateFunctionFactory(
- AggregateFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new AggregationFunctionSerializableFactory(expr, schema, env);
- }
-
- @Override
- public IRunningAggregateEvaluatorFactory createRunningAggregateFunctionFactory(
- StatefulFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public IUnnestingEvaluatorFactory createUnnestingFunctionFactory(
- UnnestingFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new UnnestingFunctionFactoryAdapter(
- new UnnestingFunctionFactory(expr, schema, env));
- }
-
- public IScalarEvaluatorFactory createEvaluatorFactory(
- ILogicalExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- switch (expr.getExpressionTag()) {
- case VARIABLE: {
- VariableReferenceExpression v = (VariableReferenceExpression) expr;
- return new ScalarEvaluatorFactoryAdapter(
- createVariableEvaluatorFactory(v, env, inputSchemas,
- context));
- }
- case CONSTANT: {
- ConstantExpression c = (ConstantExpression) expr;
- return new ScalarEvaluatorFactoryAdapter(
- createConstantEvaluatorFactory(c, env, inputSchemas,
- context));
- }
- case FUNCTION_CALL: {
- AbstractFunctionCallExpression fun = (AbstractFunctionCallExpression) expr;
- FunctionIdentifier fid = fun.getFunctionIdentifier();
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- return new ScalarEvaluatorFactoryAdapter(
- createFieldExpressionEvaluatorFactory(fun, env,
- inputSchemas, context));
- }
-
- if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
- return new ScalarEvaluatorFactoryAdapter(
- createNullExpressionEvaluatorFactory(fun, env,
- inputSchemas, context));
- }
-
- if (fun.getKind() == FunctionKind.SCALAR) {
- ScalarFunctionCallExpression scalar = (ScalarFunctionCallExpression) fun;
- return new ScalarEvaluatorFactoryAdapter(
- createScalarFunctionEvaluatorFactory(scalar, env,
- inputSchemas, context));
- } else {
- throw new AlgebricksException(
- "Cannot create evaluator for function " + fun
- + " of kind " + fun.getKind());
- }
- }
- default: {
- throw new IllegalStateException();
- }
- }
- }
-
- private ICopyEvaluatorFactory createVariableEvaluatorFactory(
- VariableReferenceExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new ColumnExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createScalarFunctionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- List<String> names = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- for (IOperatorSchema inputSchema : inputSchemas) {
- Schema schema = this.getSchema(inputSchema, env);
- names.addAll(schema.getNames());
- types.addAll(schema.getTypes());
- }
- Schema inputSchema = new Schema(names, types);
- return new ScalarFunctionExpressionEvaluatorFactory(expr, inputSchema,
- env);
- }
-
- private ICopyEvaluatorFactory createFieldExpressionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new FieldExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createNullExpressionEvaluatorFactory(
- AbstractFunctionCallExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new NullExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private ICopyEvaluatorFactory createConstantEvaluatorFactory(
- ConstantExpression expr, IVariableTypeEnvironment env,
- IOperatorSchema[] inputSchemas, JobGenContext context)
- throws AlgebricksException {
- Schema schema = this.getSchema(inputSchemas[0], env);
- return new ConstantExpressionEvaluatorFactory(expr, schema, env);
- }
-
- private Schema getSchema(IOperatorSchema inputSchema,
- IVariableTypeEnvironment env) throws AlgebricksException {
- List<String> names = new ArrayList<String>();
- List<TypeInfo> types = new ArrayList<TypeInfo>();
- Iterator<LogicalVariable> variables = inputSchema.iterator();
- while (variables.hasNext()) {
- LogicalVariable var = variables.next();
- names.add(var.toString());
- types.add((TypeInfo) env.getVarType(var));
- }
-
- Schema schema = new Schema(names, types);
- return schema;
- }
-
-}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
deleted file mode 100644
index 8f516e8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/NullExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.NullExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class NullExpressionEvaluatorFactory implements ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private ExprNodeNullDesc expr;
-
- private Schema schema;
-
- public NullExpressionEvaluatorFactory(ILogicalExpression expression,
- Schema intputSchema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (ExprNodeNullDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- schema = intputSchema;
- }
-
- public ICopyEvaluator createEvaluator(IDataOutputProvider output)
- throws AlgebricksException {
- return new NullExpressionEvaluator(expr, schema.toObjectInspector(),
- output);
- }
-
- public String toString() {
- return "null expression evaluator factory: " + expr.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
deleted file mode 100644
index 262758e..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/ScalarFunctionExpressionEvaluatorFactory.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.FunctionExpressionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluator;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyEvaluatorFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class ScalarFunctionExpressionEvaluatorFactory implements
- ICopyEvaluatorFactory {
-
- private static final long serialVersionUID = 1L;
-
- private transient ExprNodeGenericFuncDesc expr;
-
- private String exprSerialization;
-
- private Schema inputSchema;
-
- private transient Configuration config;
-
- public ScalarFunctionExpressionEvaluatorFactory(
- ILogicalExpression expression, Schema schema,
- IVariableTypeEnvironment env) throws AlgebricksException {
- try {
- expr = (ExprNodeGenericFuncDesc) ExpressionTranslator
- .getHiveExpression(expression, env);
-
- exprSerialization = Utilities.serializeExpression(expr);
-
- } catch (Exception e) {
- e.printStackTrace();
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- public synchronized ICopyEvaluator createEvaluator(
- IDataOutputProvider output) throws AlgebricksException {
- if (expr == null) {
- configClassLoader();
- expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(
- exprSerialization, config);
- }
-
- ExprNodeGenericFuncDesc funcDesc = (ExprNodeGenericFuncDesc) expr
- .clone();
- return new FunctionExpressionEvaluator(funcDesc,
- inputSchema.toObjectInspector(), output);
- }
-
- private void configClassLoader() {
- config = new Configuration();
- ClassLoader loader = this.getClass().getClassLoader();
- config.setClassLoader(loader);
- Thread.currentThread().setContextClassLoader(loader);
- }
-
- public String toString() {
- if (expr == null) {
- configClassLoader();
- expr = (ExprNodeGenericFuncDesc) Utilities.deserializeExpression(
- exprSerialization, new Configuration());
- }
-
- return "function expression evaluator factory: " + expr.getExprString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
deleted file mode 100644
index 1d77737..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/evaluator/UnnestingFunctionFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.evaluator;
-
-import org.apache.hadoop.hive.ql.plan.UDTFDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.evaluator.ExpressionTranslator;
-import edu.uci.ics.hivesterix.runtime.evaluator.UDTFFunctionEvaluator;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunction;
-import edu.uci.ics.hyracks.algebricks.runtime.base.ICopyUnnestingFunctionFactory;
-import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-
-public class UnnestingFunctionFactory implements ICopyUnnestingFunctionFactory {
-
- private static final long serialVersionUID = 1L;
-
- private UDTFDesc expr;
-
- private Schema inputSchema;
-
- private int[] columns;
-
- public UnnestingFunctionFactory(ILogicalExpression expression,
- Schema schema, IVariableTypeEnvironment env)
- throws AlgebricksException {
- try {
- expr = (UDTFDesc) ExpressionTranslator.getHiveExpression(
- expression, env);
- } catch (Exception e) {
- throw new AlgebricksException(e.getMessage());
- }
- inputSchema = schema;
- }
-
- @Override
- public ICopyUnnestingFunction createUnnestingFunction(
- IDataOutputProvider provider) throws AlgebricksException {
- return new UDTFFunctionEvaluator(expr, inputSchema, columns,
- provider.getDataOutput());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
deleted file mode 100644
index fc302e1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveDoubleBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveDoubleBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveDoubleBinaryHashFunctionFactory INSTANCE = new HiveDoubleBinaryHashFunctionFactory();
-
- private HiveDoubleBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
- // TODO Auto-generated method stub
- return new IBinaryHashFunction() {
- private Double value;
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- value = Double.longBitsToDouble(LazyUtils.byteArrayToLong(
- bytes, offset));
- return value.hashCode();
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
deleted file mode 100644
index e1a9994..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveIntegerBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveIntegerBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveIntegerBinaryHashFunctionFactory();
-
- private HiveIntegerBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
- private VInt value = new VInt();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, value);
- if (value.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int hash function actual: "
- + length + " expected " + value.length);
- return value.value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
deleted file mode 100644
index 6f7c6f2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveLongBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveLongBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveLongBinaryHashFunctionFactory();
-
- private HiveLongBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
- private VLong value = new VLong();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVLong(bytes, offset, value);
- return (int) value.value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
deleted file mode 100644
index e03dde0..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveRawBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveRawBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static IBinaryHashFunctionFactory INSTANCE = new HiveRawBinaryHashFunctionFactory();
-
- private HiveRawBinaryHashFunctionFactory() {
-
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
-
- return new IBinaryHashFunction() {
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- int value = 1;
- int end = offset + length;
- for (int i = offset; i < end; i++)
- value = value * 31 + (int) bytes[i];
- return value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
deleted file mode 100644
index 055c077..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/HiveStingBinaryHashFunctionFactory.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveStingBinaryHashFunctionFactory implements
- IBinaryHashFunctionFactory {
- private static final long serialVersionUID = 1L;
-
- public static HiveStingBinaryHashFunctionFactory INSTANCE = new HiveStingBinaryHashFunctionFactory();
-
- private HiveStingBinaryHashFunctionFactory() {
- }
-
- @Override
- public IBinaryHashFunction createBinaryHashFunction() {
- // TODO Auto-generated method stub
- return new IBinaryHashFunction() {
- private VInt len = new VInt();
-
- @Override
- public int hash(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, len);
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- return hashBytes(bytes, offset + len.length, length
- - len.length);
- }
-
- public int hashBytes(byte[] bytes, int offset, int length) {
- int value = 1;
- int end = offset + length;
- for (int i = offset; i < end; i++)
- value = value * 31 + (int) bytes[i];
- return value;
- }
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 5f03962..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,25 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveDoubleAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int header = LazyUtils.byteArrayToInt(bytes, start);
- long unsignedValue = (long) header;
- return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index e4587a2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveDoubleDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveDoubleDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
- private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveDoubleAscNormalizedKeyComputerFactory();
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory
- .createNormalizedKeyComputer();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int nk = nmkComputer.normalize(bytes, start, length);
- return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
- }
-
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 2ff390a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveIntegerAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt vint = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, vint);
- if (vint.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vint.length + " expected " + length);
- long unsignedValue = (long) vint.value;
- return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index 8eff1f8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveIntegerDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,31 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveIntegerDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt vint = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, vint);
- if (vint.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vint.length + " expected " + length);
- long unsignedValue = (long) vint.value;
- return (int) ((long) 0xffffffff - unsignedValue);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index 768eec2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveLongAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private static final int POSTIVE_LONG_MASK = (3 << 30);
- private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
- private static final int NEGATIVE_LONG_MASK = (0 << 30);
- private VLong vlong = new VLong();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVLong(bytes, start, vlong);
- if (vlong.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int comparator function actual: "
- + vlong.length + " expected " + length);
- long value = (long) vlong.value;
- int highValue = (int) (value >> 32);
- if (highValue > 0) {
- /**
- * larger than Integer.MAX
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= POSTIVE_LONG_MASK;
- return highNmk;
- } else if (highValue == 0) {
- /**
- * smaller than Integer.MAX but >=0
- */
- int lowNmk = (int) value;
- lowNmk >>= 2;
- lowNmk |= NON_NEGATIVE_INT_MASK;
- return lowNmk;
- } else {
- /**
- * less than 0; TODO: have not optimized for that
- */
- int highNmk = getKey(highValue);
- highNmk >>= 2;
- highNmk |= NEGATIVE_LONG_MASK;
- return highNmk;
- }
- }
-
- private int getKey(int value) {
- long unsignedFirstValue = (long) value;
- int nmk = (int) ((unsignedFirstValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
- return nmk;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index 20ae56a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveLongDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,27 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveLongDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
- private final INormalizedKeyComputerFactory ascNormalizedKeyComputerFactory = new HiveIntegerAscNormalizedKeyComputerFactory();
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private INormalizedKeyComputer nmkComputer = ascNormalizedKeyComputerFactory
- .createNormalizedKeyComputer();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- int nk = nmkComputer.normalize(bytes, start, length);
- return (int) ((long) Integer.MAX_VALUE - (long) (nk - Integer.MIN_VALUE));
- }
-
- };
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
deleted file mode 100644
index b16ccba..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringAscNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,44 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class HiveStringAscNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
-
- return new INormalizedKeyComputer() {
- private VInt len = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, len);
-
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- int nk = 0;
- int offset = start + len.length;
- for (int i = 0; i < 2; ++i) {
- nk <<= 16;
- if (i < len.value) {
- char character = UTF8StringPointable.charAt(bytes,
- offset);
- nk += ((int) character) & 0xffff;
- offset += UTF8StringPointable.charSize(bytes, offset);
- }
- }
- return nk;
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
deleted file mode 100644
index e8978c6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/normalize/HiveStringDescNormalizedKeyComputerFactory.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.normalize;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
-
-public class HiveStringDescNormalizedKeyComputerFactory implements
- INormalizedKeyComputerFactory {
-
- private static final long serialVersionUID = 1L;
-
- @Override
- public INormalizedKeyComputer createNormalizedKeyComputer() {
- return new INormalizedKeyComputer() {
- private VInt len = new VInt();
-
- @Override
- public int normalize(byte[] bytes, int start, int length) {
- LazyUtils.readVInt(bytes, start, len);
- if (len.value + len.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (len.value + len.length) + " but get "
- + length);
- int nk = 0;
- int offset = start + len.length;
- for (int i = 0; i < 2; ++i) {
- nk <<= 16;
- if (i < len.value) {
- nk += ((int) UTF8StringPointable.charAt(bytes, offset)) & 0xffff;
- offset += UTF8StringPointable.charSize(bytes, offset);
- }
- }
- return (int) ((long) 0xffffffff - (long) nk);
- }
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
deleted file mode 100644
index 91d08c6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/nullwriter/HiveNullWriterFactory.java
+++ /dev/null
@@ -1,28 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.factory.nullwriter;
-
-import java.io.DataOutput;
-
-import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
-import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-
-public class HiveNullWriterFactory implements INullWriterFactory {
-
- private static final long serialVersionUID = 1L;
-
- public static HiveNullWriterFactory INSTANCE = new HiveNullWriterFactory();
-
- @Override
- public INullWriter createNullWriter() {
- return new HiveNullWriter();
- }
-}
-
-class HiveNullWriter implements INullWriter {
-
- @Override
- public void writeNull(DataOutput out) throws HyracksDataException {
- // do nothing
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
deleted file mode 100644
index 3d2b141..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspector.java
+++ /dev/null
@@ -1,20 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
-
-public class HiveBinaryBooleanInspector implements IBinaryBooleanInspector {
-
- HiveBinaryBooleanInspector() {
- }
-
- @Override
- public boolean getBooleanValue(byte[] bytes, int offset, int length) {
- if (length == 0)
- return false;
- if (length != 1)
- throw new IllegalStateException("boolean field error: with length "
- + length);
- return bytes[0] == 1;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
deleted file mode 100644
index 86afbee..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryBooleanInspectorFactory.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspector;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-
-public class HiveBinaryBooleanInspectorFactory implements
- IBinaryBooleanInspectorFactory {
- private static final long serialVersionUID = 1L;
- public static HiveBinaryBooleanInspectorFactory INSTANCE = new HiveBinaryBooleanInspectorFactory();
-
- private HiveBinaryBooleanInspectorFactory() {
-
- }
-
- @Override
- public IBinaryBooleanInspector createBinaryBooleanInspector(
- IHyracksTaskContext arg0) {
- return new HiveBinaryBooleanInspector();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
deleted file mode 100644
index e82e501..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspector.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
-
-public class HiveBinaryIntegerInspector implements IBinaryIntegerInspector {
- private VInt value = new VInt();
-
- HiveBinaryIntegerInspector() {
- }
-
- @Override
- public int getIntegerValue(byte[] bytes, int offset, int length) {
- LazyUtils.readVInt(bytes, offset, value);
- if (value.length != length)
- throw new IllegalArgumentException(
- "length mismatch in int hash function actual: " + length
- + " expected " + value.length);
- return value.value;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
deleted file mode 100644
index b44e610..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/inspector/HiveBinaryIntegerInspectorFactory.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.inspector;
-
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspector;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-
-public class HiveBinaryIntegerInspectorFactory implements
- IBinaryIntegerInspectorFactory {
- private static final long serialVersionUID = 1L;
- public static HiveBinaryIntegerInspectorFactory INSTANCE = new HiveBinaryIntegerInspectorFactory();
-
- private HiveBinaryIntegerInspectorFactory() {
-
- }
-
- @Override
- public IBinaryIntegerInspector createBinaryIntegerInspector(
- IHyracksTaskContext arg0) {
- return new HiveBinaryIntegerInspector();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
deleted file mode 100644
index 8f559e2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveConnectorPolicyAssignmentPolicy.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
-import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningConnectorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
-
-public class HiveConnectorPolicyAssignmentPolicy implements
- IConnectorPolicyAssignmentPolicy {
- public enum Policy {
- PIPELINING, SEND_SIDE_MAT_PIPELINING, SEND_SIDE_MAT_BLOCKING, SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING;
- };
-
- private static final long serialVersionUID = 1L;
-
- private final IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
- private final IConnectorPolicy sendSideMatPipeliningPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
- private final IConnectorPolicy sendSideMatBlockingPolicy = new SendSideMaterializedBlockingConnectorPolicy();
- private final IConnectorPolicy sendSideMatReceiveSideMatBlockingPolicy = new SendSideMaterializedReceiveSideMaterializedBlockingConnectorPolicy();
- private final Policy policy;
-
- public HiveConnectorPolicyAssignmentPolicy(Policy policy) {
- this.policy = policy;
- }
-
- @Override
- public IConnectorPolicy getConnectorPolicyAssignment(
- IConnectorDescriptor c, int nProducers, int nConsumers,
- int[] fanouts) {
- if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- // avoid deadlocks
- switch (policy) {
- case PIPELINING:
- case SEND_SIDE_MAT_PIPELINING:
- return sendSideMatPipeliningPolicy;
- case SEND_SIDE_MAT_BLOCKING:
- return sendSideMatBlockingPolicy;
- case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
- return sendSideMatReceiveSideMatBlockingPolicy;
- default:
- return sendSideMatPipeliningPolicy;
- }
- } else if (c instanceof MToNPartitioningConnectorDescriptor) {
- // support different repartitioning policies
- switch (policy) {
- case PIPELINING:
- return pipeliningPolicy;
- case SEND_SIDE_MAT_PIPELINING:
- return sendSideMatPipeliningPolicy;
- case SEND_SIDE_MAT_BLOCKING:
- return sendSideMatBlockingPolicy;
- case SEND_SIDE_MAT_RECEIVE_SIDE_MAT_BLOCKING:
- return sendSideMatReceiveSideMatBlockingPolicy;
- default:
- return pipeliningPolicy;
- }
- } else {
- // pipelining for other connectors
- return pipeliningPolicy;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
deleted file mode 100644
index e4fbca5..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSink.java
+++ /dev/null
@@ -1,32 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.IPartitioningProperty;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.RandomPartitioningProperty;
-
-public class HiveDataSink implements IDataSink {
-
- private Object[] schema;
-
- private Object fsOperator;
-
- public HiveDataSink(Object sink, Object[] sourceSchema) {
- schema = sourceSchema;
- fsOperator = sink;
- }
-
- @Override
- public Object getId() {
- return fsOperator;
- }
-
- @Override
- public Object[] getSchemaTypes() {
- return schema;
- }
-
- public IPartitioningProperty getPartitioningProperty() {
- return new RandomPartitioningProperty(new HiveDomain());
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
deleted file mode 100644
index edff056..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDataSource.java
+++ /dev/null
@@ -1,48 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourcePropertiesProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
-
-public class HiveDataSource<P> implements IDataSource<P> {
-
- private P source;
-
- private Object[] schema;
-
- public HiveDataSource(P dataSource, Object[] sourceSchema) {
- source = dataSource;
- schema = sourceSchema;
- }
-
- @Override
- public P getId() {
- return source;
- }
-
- @Override
- public Object[] getSchemaTypes() {
- return schema;
- }
-
- @Override
- public void computeFDs(List<LogicalVariable> scanVariables,
- List<FunctionalDependency> fdList) {
- }
-
- @Override
- public IDataSourcePropertiesProvider getPropertiesProvider() {
- return new HiveDataSourcePartitioningProvider();
- }
-
- @Override
- public String toString() {
- PartitionDesc desc = (PartitionDesc) source;
- return desc.getTableName();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
deleted file mode 100644
index 0af253a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveDomain.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.properties.INodeDomain;
-
-public class HiveDomain implements INodeDomain {
-
- @Override
- public boolean sameAs(INodeDomain domain) {
- return true;
- }
-
- @Override
- public Integer cardinality() {
- return 0;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
deleted file mode 100644
index 5782703..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveMetaDataProvider.java
+++ /dev/null
@@ -1,149 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.HashMap;
-import java.util.List;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-
-import edu.uci.ics.hivesterix.logical.expression.HiveFunctionInfo;
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
-import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSink;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSourceIndex;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-
-@SuppressWarnings("rawtypes")
-public class HiveMetaDataProvider<S, T> implements IMetadataProvider<S, T> {
-
- private Operator fileSink;
- private Schema outputSchema;
- private HashMap<S, IDataSource<S>> dataSourceMap;
-
- public HiveMetaDataProvider(Operator fsOp, Schema oi,
- HashMap<S, IDataSource<S>> map) {
- fileSink = fsOp;
- outputSchema = oi;
- dataSourceMap = map;
- }
-
- @Override
- public IDataSourceIndex<T, S> findDataSourceIndex(T indexId, S dataSourceId)
- throws AlgebricksException {
- return null;
- }
-
- @Override
- public IDataSource<S> findDataSource(S id) throws AlgebricksException {
- return dataSourceMap.get(id);
- }
-
- @Override
- public boolean scannerOperatorIsLeaf(IDataSource<S> dataSource) {
- return true;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getScannerRuntime(
- IDataSource<S> dataSource, List<LogicalVariable> scanVariables,
- List<LogicalVariable> projectVariables, boolean projectPushed,
- IOperatorSchema opSchema, IVariableTypeEnvironment typeEnv,
- JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
-
- S desc = dataSource.getId();
- HiveScanRuntimeGenerator generator = new HiveScanRuntimeGenerator(
- (PartitionDesc) desc);
- return generator.getRuntimeOperatorAndConstraint(dataSource,
- scanVariables, projectVariables, projectPushed, context,
- jobSpec);
- }
-
- @Override
- public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriteFileRuntime(
- IDataSink sink, int[] printColumns,
- IPrinterFactory[] printerFactories, RecordDescriptor inputDesc) {
-
- HiveWriteRuntimeGenerator generator = new HiveWriteRuntimeGenerator(
- (FileSinkOperator) fileSink, outputSchema);
- return generator.getWriterRuntime(inputDesc);
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getDeleteRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- RecordDescriptor arg4, JobGenContext arg5, JobSpecification arg6)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getInsertRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- RecordDescriptor arg4, JobGenContext arg5, JobSpecification arg6)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getWriteResultRuntime(
- IDataSource<S> arg0, IOperatorSchema arg1,
- List<LogicalVariable> arg2, LogicalVariable arg3,
- JobGenContext arg4, JobSpecification arg5)
- throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public IFunctionInfo lookupFunction(FunctionIdentifier arg0) {
- return new HiveFunctionInfo(arg0, null);
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexInsertRuntime(
- IDataSourceIndex<T, S> dataSource,
- IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
- IVariableTypeEnvironment typeEnv,
- List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr,
- RecordDescriptor recordDesc, JobGenContext context,
- JobSpecification spec) throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
- @Override
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getIndexDeleteRuntime(
- IDataSourceIndex<T, S> dataSource,
- IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas,
- IVariableTypeEnvironment typeEnv,
- List<LogicalVariable> primaryKeys,
- List<LogicalVariable> secondaryKeys, ILogicalExpression filterExpr,
- RecordDescriptor recordDesc, JobGenContext context,
- JobSpecification spec) throws AlgebricksException {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
deleted file mode 100644
index 83382f0..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveOperatorSchema.java
+++ /dev/null
@@ -1,84 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-
-public class HiveOperatorSchema implements IOperatorSchema {
-
- private final Map<LogicalVariable, Integer> varMap;
-
- private final List<LogicalVariable> varList;
-
- public HiveOperatorSchema() {
- varMap = new HashMap<LogicalVariable, Integer>();
- varList = new ArrayList<LogicalVariable>();
- }
-
- @Override
- public void addAllVariables(IOperatorSchema source) {
- for (LogicalVariable v : source) {
- varMap.put(v, varList.size());
- varList.add(v);
- }
- }
-
- @Override
- public void addAllNewVariables(IOperatorSchema source) {
- for (LogicalVariable v : source) {
- if (varMap.get(v) == null) {
- varMap.put(v, varList.size());
- varList.add(v);
- }
- }
- }
-
- @Override
- public int addVariable(LogicalVariable var) {
- int idx = varList.size();
- varMap.put(var, idx);
- varList.add(var);
- return idx;
- }
-
- @Override
- public void clear() {
- varMap.clear();
- varList.clear();
- }
-
- @Override
- public int findVariable(LogicalVariable var) {
- Integer i = varMap.get(var);
- if (i == null) {
- return -1;
- }
- return i;
- }
-
- @Override
- public int getSize() {
- return varList.size();
- }
-
- @Override
- public LogicalVariable getVariable(int index) {
- return varList.get(index);
- }
-
- @Override
- public Iterator<LogicalVariable> iterator() {
- return varList.iterator();
- }
-
- @Override
- public String toString() {
- return varMap.toString();
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
deleted file mode 100644
index 9c8aee4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveScanRuntimeGenerator.java
+++ /dev/null
@@ -1,117 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveFileScanOperatorDescriptor;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveFileSplitProvider;
-import edu.uci.ics.hivesterix.runtime.operator.filescan.HiveTupleParserFactory;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
-import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IDataSource;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
-import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
-import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings({ "rawtypes", "deprecation" })
-public class HiveScanRuntimeGenerator {
-
- private PartitionDesc fileDesc;
-
- private transient Path filePath;
-
- private String filePathName;
-
- private Properties properties;
-
- public HiveScanRuntimeGenerator(PartitionDesc path) {
- fileDesc = path;
- properties = fileDesc.getProperties();
-
- String inputPath = (String) properties.getProperty("location");
-
- if (inputPath.startsWith("file:")) {
- // Windows
- String[] strs = inputPath.split(":");
- filePathName = strs[strs.length - 1];
- } else {
- // Linux
- filePathName = inputPath;
- }
-
- filePath = new Path(filePathName);
- }
-
- public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> getRuntimeOperatorAndConstraint(
- IDataSource dataSource, List<LogicalVariable> scanVariables,
- List<LogicalVariable> projectVariables, boolean projectPushed,
- JobGenContext context, JobSpecification jobSpec)
- throws AlgebricksException {
- // get the correct delimiter from Hive metastore or other data
- // structures
- IOperatorSchema propagatedSchema = new HiveOperatorSchema();
-
- List<LogicalVariable> outputVariables = projectPushed ? projectVariables
- : scanVariables;
- for (LogicalVariable var : outputVariables)
- propagatedSchema.addVariable(var);
-
- int[] outputColumnsOffset = new int[scanVariables.size()];
- int i = 0;
- for (LogicalVariable var : scanVariables)
- if (outputVariables.contains(var)) {
- int offset = outputVariables.indexOf(var);
- outputColumnsOffset[i++] = offset;
- } else
- outputColumnsOffset[i++] = -1;
-
- Object[] schemaTypes = dataSource.getSchemaTypes();
- // get record descriptor
- RecordDescriptor recDescriptor = mkRecordDescriptor(propagatedSchema,
- schemaTypes, context);
-
- // setup the run time operator
- JobConf conf = ConfUtil.getJobConf(fileDesc.getInputFileFormatClass(),
- filePath);
- int clusterSize = ConfUtil.getNCs().length;
- IFileSplitProvider fsprovider = new HiveFileSplitProvider(conf,
- filePathName, clusterSize);
- ITupleParserFactory tupleParserFactory = new HiveTupleParserFactory(
- fileDesc, conf, outputColumnsOffset);
- HiveFileScanOperatorDescriptor opDesc = new HiveFileScanOperatorDescriptor(
- jobSpec, fsprovider, tupleParserFactory, recDescriptor);
-
- return new Pair<IOperatorDescriptor, AlgebricksPartitionConstraint>(
- opDesc, opDesc.getPartitionConstraint());
- }
-
- private static RecordDescriptor mkRecordDescriptor(
- IOperatorSchema opSchema, Object[] types, JobGenContext context)
- throws AlgebricksException {
- ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema
- .getSize()];
- ISerializerDeserializerProvider sdp = context
- .getSerializerDeserializerProvider();
- int size = opSchema.getSize();
- for (int i = 0; i < size; i++) {
- Object t = types[i];
- fields[i] = sdp.getSerializerDeserializer(t);
- i++;
- }
- return new RecordDescriptor(fields);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
deleted file mode 100644
index d372868..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/jobgen/HiveWriteRuntimeGenerator.java
+++ /dev/null
@@ -1,40 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.jobgen;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.runtime.operator.filewrite.HivePushRuntimeFactory;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-
-@SuppressWarnings("deprecation")
-public class HiveWriteRuntimeGenerator {
- private FileSinkOperator fileSink;
-
- private Schema inputSchema;
-
- public HiveWriteRuntimeGenerator(FileSinkOperator fsOp, Schema oi) {
- fileSink = fsOp;
- inputSchema = oi;
- }
-
- /**
- * get the write runtime
- *
- * @param inputDesc
- * @return
- */
- public Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> getWriterRuntime(
- RecordDescriptor inputDesc) {
- JobConf conf = ConfUtil.getJobConf();
- IPushRuntimeFactory factory = new HivePushRuntimeFactory(inputDesc,
- conf, fileSink, inputSchema);
- Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint> pair = new Pair<IPushRuntimeFactory, AlgebricksPartitionConstraint>(
- factory, null);
- return pair;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java
deleted file mode 100644
index 2f988f8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveFileSplitProvider.java
+++ /dev/null
@@ -1,19 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import edu.uci.ics.hyracks.dataflow.std.file.FileSplit;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-
-public abstract class AbstractHiveFileSplitProvider implements
- IFileSplitProvider {
- private static final long serialVersionUID = 1L;
-
- @Override
- public FileSplit[] getFileSplits() {
- // TODO Auto-generated method stub
- return null;
- }
-
- @SuppressWarnings("deprecation")
- public abstract org.apache.hadoop.mapred.FileSplit[] getFileSplitArray();
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java
deleted file mode 100644
index a8addeb..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/AbstractHiveTupleParser.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.InputStream;
-
-import org.apache.hadoop.mapred.FileSplit;
-
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractHiveTupleParser implements ITupleParser {
-
- @Override
- public void parse(InputStream in, IFrameWriter writer)
- throws HyracksDataException {
- // empty implementation
- }
-
- /**
- * method for parsing HDFS file split
- *
- * @param split
- * @param writer
- */
- abstract public void parse(FileSplit split, IFrameWriter writer)
- throws HyracksDataException;
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java
deleted file mode 100644
index 9e62c73..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileScanOperatorDescriptor.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Copyright 2009-2010 by The Regents of the University of California
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * you may obtain a copy of the License from
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.IOException;
-import java.net.InetAddress;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.Random;
-
-import org.apache.hadoop.mapred.FileSplit;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
-import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.api.job.JobSpecification;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
-import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
-import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings("deprecation")
-public class HiveFileScanOperatorDescriptor extends
- AbstractSingleActivityOperatorDescriptor {
- private static final long serialVersionUID = 1L;
-
- /**
- * tuple parser factory
- */
- private final ITupleParserFactory tupleParserFactory;
-
- /**
- * Hive file split
- */
- private Partition[] parts;
-
- /**
- * IFileSplitProvider
- */
- private IFileSplitProvider fileSplitProvider;
-
- /**
- * constrains in the form of host DNS names
- */
- private String[] constraintsByHostNames;
-
- /**
- * ip-to-node controller mapping
- */
- private Map<String, List<String>> ncMapping;
-
- /**
- * an array of NCs
- */
- private String[] NCs;
-
- /**
- *
- * @param spec
- * @param fsProvider
- */
- public HiveFileScanOperatorDescriptor(JobSpecification spec,
- IFileSplitProvider fsProvider,
- ITupleParserFactory tupleParserFactory, RecordDescriptor rDesc) {
- super(spec, 0, 1);
- this.tupleParserFactory = tupleParserFactory;
- recordDescriptors[0] = rDesc;
- fileSplitProvider = fsProvider;
- }
-
- /**
- * set partition constraint at the first time it is called the number of
- * partitions is obtained from HDFS name node
- */
- public AlgebricksAbsolutePartitionConstraint getPartitionConstraint()
- throws AlgebricksException {
- FileSplit[] returnedSplits = ((AbstractHiveFileSplitProvider) fileSplitProvider)
- .getFileSplitArray();
- Random random = new Random(System.currentTimeMillis());
- ncMapping = ConfUtil.getNCMapping();
- NCs = ConfUtil.getNCs();
-
- int size = 0;
- for (FileSplit split : returnedSplits)
- if (split != null)
- size++;
-
- FileSplit[] splits = new FileSplit[size];
- for (int i = 0; i < returnedSplits.length; i++)
- if (returnedSplits[i] != null)
- splits[i] = returnedSplits[i];
-
- System.out.println("number of splits: " + splits.length);
- constraintsByHostNames = new String[splits.length];
- for (int i = 0; i < splits.length; i++) {
- try {
- String[] loc = splits[i].getLocations();
- Collections.shuffle(Arrays.asList(loc), random);
- if (loc.length > 0) {
- InetAddress[] allIps = InetAddress.getAllByName(loc[0]);
- for (InetAddress ip : allIps) {
- if (ncMapping.get(ip.getHostAddress()) != null) {
- List<String> ncs = ncMapping.get(ip
- .getHostAddress());
- int pos = random.nextInt(ncs.size());
- constraintsByHostNames[i] = ncs.get(pos);
- } else {
- int pos = random.nextInt(NCs.length);
- constraintsByHostNames[i] = NCs[pos];
- }
- }
- } else {
- int pos = random.nextInt(NCs.length);
- constraintsByHostNames[i] = NCs[pos];
- if (splits[i].getLength() > 0)
- throw new IllegalStateException(
- "non local scanner non locations!!");
- }
- } catch (IOException e) {
- throw new AlgebricksException(e);
- }
- }
-
- parts = new Partition[splits.length];
- for (int i = 0; i < splits.length; i++) {
- parts[i] = new Partition(splits[i]);
- }
- return new AlgebricksAbsolutePartitionConstraint(constraintsByHostNames);
- }
-
- @Override
- public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
- IRecordDescriptorProvider recordDescProvider, int partition,
- int nPartitions) {
-
- final ITupleParser tp = tupleParserFactory.createTupleParser(ctx);
- final int partitionId = partition;
-
- return new AbstractUnaryOutputSourceOperatorNodePushable() {
-
- @Override
- public void initialize() throws HyracksDataException {
- writer.open();
- FileSplit split = parts[partitionId].toFileSplit();
- if (split == null)
- throw new HyracksDataException("partition " + partitionId
- + " is null!");
- ((AbstractHiveTupleParser) tp).parse(split, writer);
- writer.close();
- }
- };
- }
-}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java
deleted file mode 100644
index d92d353..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveFileSplitProvider.java
+++ /dev/null
@@ -1,115 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.UUID;
-
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.eclipse.jetty.util.log.Log;
-
-@SuppressWarnings({ "deprecation", "rawtypes" })
-public class HiveFileSplitProvider extends AbstractHiveFileSplitProvider {
- private static final long serialVersionUID = 1L;
-
- private transient InputFormat format;
- private transient JobConf conf;
- private String confContent;
- final private int nPartition;
- private transient FileSplit[] splits;
-
- public HiveFileSplitProvider(JobConf conf, String filePath, int nPartition) {
- format = conf.getInputFormat();
- this.conf = conf;
- this.nPartition = nPartition;
- writeConfContent();
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- @Override
- /**
- * get the HDFS file split
- */
- public FileSplit[] getFileSplitArray() {
- readConfContent();
- conf.setClassLoader(this.getClass().getClassLoader());
- format = conf.getInputFormat();
- // int splitSize = conf.getInt("mapred.min.split.size", 0);
-
- if (splits == null) {
- try {
- splits = (org.apache.hadoop.mapred.FileSplit[]) format
- .getSplits(conf, nPartition);
- System.out.println("hdfs split number: " + splits.length);
- } catch (IOException e) {
- String inputPath = conf.get("mapred.input.dir");
- String hdfsURL = conf.get("fs.default.name");
- String alternatePath = inputPath.replaceAll(hdfsURL, "file:");
- conf.set("mapred.input.dir", alternatePath);
- try {
- splits = (org.apache.hadoop.mapred.FileSplit[]) format
- .getSplits(conf, nPartition);
- System.out.println("hdfs split number: " + splits.length);
- } catch (IOException e1) {
- e1.printStackTrace();
- Log.debug(e1.getMessage());
- return null;
- }
- }
- }
- return splits;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
deleted file mode 100644
index 7681bd1..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
+++ /dev/null
@@ -1,233 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.List;
-import java.util.Properties;
-
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.FileSplit;
-import org.apache.hadoop.mapred.InputFormat;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import edu.uci.ics.hivesterix.serde.parser.IHiveParser;
-import edu.uci.ics.hivesterix.serde.parser.TextToBinaryTupleParser;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
-import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
-
-@SuppressWarnings({ "rawtypes", "deprecation", "unchecked" })
-public class HiveTupleParser extends AbstractHiveTupleParser {
-
- private int[] outputColumnsOffset;
- /**
- * class of input format
- */
- private InputFormat inputFormat;
-
- /**
- * serialization/deserialization object
- */
- private SerDe serDe;
-
- /**
- * the input row object inspector
- */
- private ObjectInspector objectInspector;
-
- /**
- * the hadoop job conf
- */
- private JobConf job;
-
- /**
- * Hyrax context to control resource allocation
- */
- private final IHyracksTaskContext ctx;
-
- /**
- * lazy serde: format flow in between operators
- */
- private final SerDe outputSerDe;
-
- /**
- * the parser from hive data to binary data
- */
- private IHiveParser parser = null;
-
- /**
- * parser for any hive input format
- *
- * @param inputFormatClass
- * @param serDeClass
- * @param tbl
- * @param conf
- * @throws AlgebricksException
- */
- public HiveTupleParser(String inputFormatClass, String serDeClass,
- String outputSerDeClass, Properties tbl, JobConf conf,
- final IHyracksTaskContext ctx, int[] outputColumnsOffset)
- throws AlgebricksException {
- try {
- conf.setClassLoader(this.getClass().getClassLoader());
-
- inputFormat = (InputFormat) ReflectionUtils.newInstance(
- Class.forName(inputFormatClass), conf);
- job = conf;
-
- // initialize the input serde
- serDe = (SerDe) ReflectionUtils.newInstance(
- Class.forName(serDeClass), job);
- serDe.initialize(job, tbl);
-
- // initialize the output serde
- outputSerDe = (SerDe) ReflectionUtils.newInstance(
- Class.forName(outputSerDeClass), job);
- outputSerDe.initialize(job, tbl);
-
- // object inspector of the row
- objectInspector = serDe.getObjectInspector();
-
- // hyracks context
- this.ctx = ctx;
- this.outputColumnsOffset = outputColumnsOffset;
-
- if (objectInspector instanceof LazySimpleStructObjectInspector) {
- LazySimpleStructObjectInspector rowInspector = (LazySimpleStructObjectInspector) objectInspector;
- List<? extends StructField> fieldRefs = rowInspector
- .getAllStructFieldRefs();
- boolean lightWeightParsable = true;
- for (StructField fieldRef : fieldRefs) {
- Category category = fieldRef.getFieldObjectInspector()
- .getCategory();
- if (!(category == Category.PRIMITIVE)) {
- lightWeightParsable = false;
- break;
- }
- }
- if (lightWeightParsable)
- parser = new TextToBinaryTupleParser(
- this.outputColumnsOffset, this.objectInspector);
- }
- } catch (Exception e) {
- throw new AlgebricksException(e);
- }
- }
-
- /**
- * parse a input HDFS file split, the result is send to the writer
- * one-frame-a-time
- *
- * @param split
- * the HDFS file split
- * @param writer
- * the writer
- * @throws HyracksDataException
- * if there is sth. wrong in the ser/de
- */
- @Override
- public void parse(FileSplit split, IFrameWriter writer)
- throws HyracksDataException {
- try {
- StructObjectInspector structInspector = (StructObjectInspector) objectInspector;
-
- // create the reader, key, and value
- RecordReader reader = inputFormat.getRecordReader(split, job,
- Reporter.NULL);
- Object key = reader.createKey();
- Object value = reader.createValue();
-
- // allocate a new frame
- ByteBuffer frame = ctx.allocateFrame();
- FrameTupleAppender appender = new FrameTupleAppender(
- ctx.getFrameSize());
- appender.reset(frame, true);
-
- List<? extends StructField> fieldRefs = structInspector
- .getAllStructFieldRefs();
- int size = 0;
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- size++;
-
- ArrayTupleBuilder tb = new ArrayTupleBuilder(size);
- DataOutput dos = tb.getDataOutput();
- StructField[] outputFieldRefs = new StructField[size];
- Object[] outputFields = new Object[size];
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- outputFieldRefs[outputColumnsOffset[i]] = fieldRefs.get(i);
-
- while (reader.next(key, value)) {
- // reuse the tuple builder
- tb.reset();
- if (parser != null) {
- Text text = (Text) value;
- parser.parse(text.getBytes(), 0, text.getLength(), tb);
- } else {
- Object row = serDe.deserialize((Writable) value);
- // write fields to the tuple builder one by one
- int i = 0;
- for (StructField fieldRef : fieldRefs) {
- if (outputColumnsOffset[i] >= 0)
- outputFields[outputColumnsOffset[i]] = structInspector
- .getStructFieldData(row, fieldRef);
- i++;
- }
-
- i = 0;
- for (Object field : outputFields) {
- BytesWritable fieldWritable = (BytesWritable) outputSerDe
- .serialize(field, outputFieldRefs[i]
- .getFieldObjectInspector());
- dos.write(fieldWritable.getBytes(), 0,
- fieldWritable.getSize());
- tb.addFieldEndOffset();
- i++;
- }
- }
-
- if (!appender.append(tb.getFieldEndOffsets(),
- tb.getByteArray(), 0, tb.getSize())) {
- if (appender.getTupleCount() <= 0)
- throw new IllegalStateException(
- "zero tuples in a frame!");
- FrameUtils.flushFrame(frame, writer);
- appender.reset(frame, true);
- if (!appender.append(tb.getFieldEndOffsets(),
- tb.getByteArray(), 0, tb.getSize())) {
- throw new IllegalStateException();
- }
- }
- }
- reader.close();
- System.gc();
-
- // flush the last frame
- if (appender.getTupleCount() > 0) {
- FrameUtils.flushFrame(frame, writer);
- }
- } catch (IOException e) {
- throw new HyracksDataException(e);
- } catch (SerDeException e) {
- throw new HyracksDataException(e);
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java
deleted file mode 100644
index 69aa881..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParserFactory.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.Properties;
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.plan.PartitionDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParser;
-import edu.uci.ics.hyracks.dataflow.std.file.ITupleParserFactory;
-
-@SuppressWarnings("deprecation")
-public class HiveTupleParserFactory implements ITupleParserFactory {
-
- private static final long serialVersionUID = 1L;
-
- private int[] outputColumns;
-
- private String outputSerDeClass = LazySerDe.class.getName();
-
- private String inputSerDeClass;
-
- private transient JobConf conf;
-
- private Properties tbl;
-
- private String confContent;
-
- private String inputFormatClass;
-
- public HiveTupleParserFactory(PartitionDesc desc, JobConf conf,
- int[] outputColumns) {
- this.conf = conf;
- tbl = desc.getProperties();
- inputFormatClass = (String) tbl.getProperty("file.inputformat");
- inputSerDeClass = (String) tbl.getProperty("serialization.lib");
- this.outputColumns = outputColumns;
-
- writeConfContent();
- }
-
- @Override
- public ITupleParser createTupleParser(IHyracksTaskContext ctx) {
- readConfContent();
- try {
- return new HiveTupleParser(inputFormatClass, inputSerDeClass,
- outputSerDeClass, tbl, conf, ctx, outputColumns);
- } catch (Exception e) {
- e.printStackTrace();
- return null;
- }
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
-
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java
deleted file mode 100644
index 1b3dcf2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/Partition.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filescan;
-
-import java.io.IOException;
-import java.io.Serializable;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.FileSplit;
-
-@SuppressWarnings("deprecation")
-public class Partition implements Serializable {
- private static final long serialVersionUID = 1L;
-
- private String uri;
- private long offset;
- private long length;
- private String[] locations;
-
- public Partition() {
- }
-
- public Partition(FileSplit file) {
- uri = file.getPath().toUri().toString();
- offset = file.getStart();
- length = file.getLength();
- try {
- locations = file.getLocations();
- } catch (IOException e) {
- throw new IllegalStateException(e);
- }
- }
-
- public FileSplit toFileSplit() {
- return new FileSplit(new Path(uri), offset, length, locations);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
deleted file mode 100644
index 05e79ea..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HiveFileWritePushRuntime.java
+++ /dev/null
@@ -1,153 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filewrite;
-
-import java.nio.ByteBuffer;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.OperatorFactory;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
-import edu.uci.ics.hyracks.api.comm.IFrameWriter;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.FrameTupleReference;
-
-@SuppressWarnings("deprecation")
-public class HiveFileWritePushRuntime implements IPushRuntime {
-
- /**
- * frame tuple accessor to access byte buffer
- */
- private final FrameTupleAccessor accessor;
-
- /**
- * input object inspector
- */
- private final ObjectInspector inputInspector;
-
- /**
- * cachedInput
- */
- private final LazyColumnar cachedInput;
-
- /**
- * File sink operator of Hive
- */
- private final FileSinkDesc fileSink;
-
- /**
- * job configuration, which contain name node and other configuration
- * information
- */
- private JobConf conf;
-
- /**
- * input object inspector
- */
- private final Schema inputSchema;
-
- /**
- * a copy of hive schema representation
- */
- private RowSchema rowSchema;
-
- /**
- * the Hive file sink operator
- */
- private FileSinkOperator fsOp;
-
- /**
- * cached tuple object reference
- */
- private FrameTupleReference tuple = new FrameTupleReference();
-
- /**
- * @param spec
- * @param fsProvider
- */
- public HiveFileWritePushRuntime(IHyracksTaskContext context,
- RecordDescriptor inputRecordDesc, JobConf job, FileSinkDesc fs,
- RowSchema schema, Schema oi) {
- fileSink = fs;
- fileSink.setGatherStats(false);
-
- rowSchema = schema;
- conf = job;
- inputSchema = oi;
-
- accessor = new FrameTupleAccessor(context.getFrameSize(),
- inputRecordDesc);
- inputInspector = inputSchema.toObjectInspector();
- cachedInput = new LazyColumnar(
- (LazyColumnarObjectInspector) inputInspector);
- }
-
- @Override
- public void open() throws HyracksDataException {
- fsOp = (FileSinkOperator) OperatorFactory.get(fileSink, rowSchema);
- fsOp.setChildOperators(null);
- fsOp.setParentOperators(null);
- conf.setClassLoader(this.getClass().getClassLoader());
-
- ObjectInspector[] inspectors = new ObjectInspector[1];
- inspectors[0] = inputInspector;
- try {
- fsOp.initialize(conf, inspectors);
- fsOp.setExecContext(null);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- @Override
- public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
- accessor.reset(buffer);
- int n = accessor.getTupleCount();
- try {
- for (int i = 0; i < n; ++i) {
- tuple.reset(accessor, i);
- cachedInput.init(tuple);
- fsOp.process(cachedInput, 0);
- }
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void close() throws HyracksDataException {
- try {
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
- fsOp.closeOp(false);
- } catch (HiveException e) {
- throw new HyracksDataException(e);
- }
- }
-
- @Override
- public void setFrameWriter(int index, IFrameWriter writer,
- RecordDescriptor recordDesc) {
- throw new IllegalStateException();
- }
-
- @Override
- public void setInputRecordDescriptor(int index,
- RecordDescriptor recordDescriptor) {
- }
-
- @Override
- public void fail() throws HyracksDataException {
-
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
deleted file mode 100644
index 43e90fa..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filewrite/HivePushRuntimeFactory.java
+++ /dev/null
@@ -1,113 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.operator.filewrite;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
-import java.io.PrintWriter;
-import java.util.UUID;
-
-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
-import org.apache.hadoop.hive.ql.exec.RowSchema;
-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
-import org.apache.hadoop.mapred.JobConf;
-
-import edu.uci.ics.hivesterix.logical.expression.Schema;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntime;
-import edu.uci.ics.hyracks.algebricks.runtime.base.IPushRuntimeFactory;
-import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
-import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
-
-@SuppressWarnings("deprecation")
-public class HivePushRuntimeFactory implements IPushRuntimeFactory {
-
- private static final long serialVersionUID = 1L;
-
- private final RecordDescriptor inputRecordDesc;
- private transient JobConf conf;
- private final FileSinkDesc fileSink;
- private final RowSchema outSchema;
- private final Schema schema;
-
- /**
- * the content of the configuration
- */
- private String confContent;
-
- public HivePushRuntimeFactory(RecordDescriptor inputRecordDesc,
- JobConf conf, FileSinkOperator fsp, Schema sch) {
- this.inputRecordDesc = inputRecordDesc;
- this.conf = conf;
- this.fileSink = fsp.getConf();
- outSchema = fsp.getSchema();
- this.schema = sch;
-
- writeConfContent();
- }
-
- @Override
- public String toString() {
- return "file write";
- }
-
- @Override
- public IPushRuntime createPushRuntime(IHyracksTaskContext context)
- throws AlgebricksException {
- if (conf == null)
- readConfContent();
-
- return new HiveFileWritePushRuntime(context, inputRecordDesc, conf,
- fileSink, outSchema, schema);
- }
-
- private void readConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- PrintWriter out = new PrintWriter((new OutputStreamWriter(
- new FileOutputStream(new File(fileName)))));
- out.write(confContent);
- out.close();
- conf = new JobConf(fileName);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- private void writeConfContent() {
- File dir = new File("hadoop-conf-tmp");
- if (!dir.exists()) {
- dir.mkdir();
- }
-
- String fileName = "hadoop-conf-tmp/" + UUID.randomUUID()
- + System.currentTimeMillis() + ".xml";
- try {
- DataOutputStream out = new DataOutputStream(new FileOutputStream(
- new File(fileName)));
- conf.writeXml(out);
- out.close();
-
- DataInputStream in = new DataInputStream(new FileInputStream(
- fileName));
- StringBuffer buffer = new StringBuffer();
- String line;
- while ((line = in.readLine()) != null) {
- buffer.append(line + "\n");
- }
- in.close();
- confContent = buffer.toString();
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
deleted file mode 100644
index 5a2e98c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryComparatorFactoryProvider.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveByteBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveDoubleBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveFloatBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveIntegerBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveLongBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveShortBinaryDescComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryAscComparatorFactory;
-import edu.uci.ics.hivesterix.runtime.factory.comparator.HiveStringBinaryDescComparatorFactory;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
-
-public class HiveBinaryComparatorFactoryProvider implements
- IBinaryComparatorFactoryProvider {
-
- public static final HiveBinaryComparatorFactoryProvider INSTANCE = new HiveBinaryComparatorFactoryProvider();
-
- private HiveBinaryComparatorFactoryProvider() {
- }
-
- @Override
- public IBinaryComparatorFactory getBinaryComparatorFactory(Object type,
- boolean ascending) throws AlgebricksException {
- if (type.equals(TypeInfoFactory.intTypeInfo)) {
- if (ascending)
- return HiveIntegerBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveIntegerBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- if (ascending)
- return HiveLongBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveLongBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.floatTypeInfo)) {
- if (ascending)
- return HiveFloatBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveFloatBinaryDescComparatorFactory.INSTANCE;
-
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- if (ascending)
- return HiveDoubleBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveDoubleBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.shortTypeInfo)) {
- if (ascending)
- return HiveShortBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveShortBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- if (ascending)
- return HiveStringBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveStringBinaryDescComparatorFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.byteTypeInfo)
- || type.equals(TypeInfoFactory.booleanTypeInfo)) {
- if (ascending)
- return HiveByteBinaryAscComparatorFactory.INSTANCE;
- else
- return HiveByteBinaryDescComparatorFactory.INSTANCE;
- } else
- throw new NotImplementedException();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
deleted file mode 100644
index 371d45b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFactoryProvider.java
+++ /dev/null
@@ -1,37 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveDoubleBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveIntegerBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveLongBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveRawBinaryHashFunctionFactory;
-import edu.uci.ics.hivesterix.runtime.factory.hashfunction.HiveStingBinaryHashFunctionFactory;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
-
-public class HiveBinaryHashFunctionFactoryProvider implements
- IBinaryHashFunctionFactoryProvider {
-
- public static final HiveBinaryHashFunctionFactoryProvider INSTANCE = new HiveBinaryHashFunctionFactoryProvider();
-
- private HiveBinaryHashFunctionFactoryProvider() {
- }
-
- @Override
- public IBinaryHashFunctionFactory getBinaryHashFunctionFactory(Object type)
- throws AlgebricksException {
- if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return HiveIntegerBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return HiveLongBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return HiveStingBinaryHashFunctionFactory.INSTANCE;
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return HiveDoubleBinaryHashFunctionFactory.INSTANCE;
- } else {
- return HiveRawBinaryHashFunctionFactory.INSTANCE;
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
deleted file mode 100644
index 9e3a8ae..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveNormalizedKeyComputerFactoryProvider.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveDoubleDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveIntegerDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveLongDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringAscNormalizedKeyComputerFactory;
-import edu.uci.ics.hivesterix.runtime.factory.normalize.HiveStringDescNormalizedKeyComputerFactory;
-import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
-
-public class HiveNormalizedKeyComputerFactoryProvider implements
- INormalizedKeyComputerFactoryProvider {
-
- public static final HiveNormalizedKeyComputerFactoryProvider INSTANCE = new HiveNormalizedKeyComputerFactoryProvider();
-
- private HiveNormalizedKeyComputerFactoryProvider() {
- }
-
- @Override
- public INormalizedKeyComputerFactory getNormalizedKeyComputerFactory(
- Object type, boolean ascending) {
- if (ascending) {
- if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return new HiveStringAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return new HiveIntegerAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return new HiveLongAscNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return new HiveDoubleAscNormalizedKeyComputerFactory();
- } else {
- return null;
- }
- } else {
- if (type.equals(TypeInfoFactory.stringTypeInfo)) {
- return new HiveStringDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.intTypeInfo)) {
- return new HiveIntegerDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.longTypeInfo)) {
- return new HiveLongDescNormalizedKeyComputerFactory();
- } else if (type.equals(TypeInfoFactory.doubleTypeInfo)) {
- return new HiveDoubleDescNormalizedKeyComputerFactory();
- } else {
- return null;
- }
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
deleted file mode 100644
index 7938de8..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveSerializerDeserializerProvider.java
+++ /dev/null
@@ -1,23 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.data.ISerializerDeserializerProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
-
-public class HiveSerializerDeserializerProvider implements
- ISerializerDeserializerProvider {
-
- public static final HiveSerializerDeserializerProvider INSTANCE = new HiveSerializerDeserializerProvider();
-
- private HiveSerializerDeserializerProvider() {
- }
-
- @SuppressWarnings("rawtypes")
- @Override
- public ISerializerDeserializer getSerializerDeserializer(Object type)
- throws AlgebricksException {
- // return ARecordSerializerDeserializer.SCHEMALESS_INSTANCE;
- return null;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
deleted file mode 100644
index 2059128..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveTypeTraitProvider.java
+++ /dev/null
@@ -1,33 +0,0 @@
-package edu.uci.ics.hivesterix.runtime.provider;
-
-import java.io.Serializable;
-
-import edu.uci.ics.hyracks.algebricks.data.ITypeTraitProvider;
-import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
-
-public class HiveTypeTraitProvider implements ITypeTraitProvider, Serializable {
- private static final long serialVersionUID = 1L;
- public static HiveTypeTraitProvider INSTANCE = new HiveTypeTraitProvider();
-
- private HiveTypeTraitProvider() {
-
- }
-
- @Override
- public ITypeTraits getTypeTrait(Object arg0) {
- return new ITypeTraits() {
- private static final long serialVersionUID = 1L;
-
- @Override
- public int getFixedLength() {
- return -1;
- }
-
- @Override
- public boolean isFixedLength() {
- return false;
- }
-
- };
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
deleted file mode 100644
index 821c03d..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyArray.java
+++ /dev/null
@@ -1,236 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
-
-/**
- * LazyArray is serialized as follows: start A b b b b b b end bytes[] ->
- * |--------|---|---|---|---| ... |---|---|
- *
- * Section A is the null-bytes. Suppose the list has N elements, then there are
- * (N+7)/8 bytes used as null-bytes. Each bit corresponds to an element and it
- * indicates whether that element is null (0) or not null (1).
- *
- * After A, all b(s) represent the elements of the list. Each of them is again a
- * LazyObject.
- *
- */
-
-public class LazyArray extends LazyNonPrimitive<LazyListObjectInspector> {
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed = false;
- /**
- * The length of the array. Only valid when the data is parsed.
- */
- int arraySize = 0;
-
- /**
- * The start positions and lengths of array elements. Only valid when the
- * data is parsed.
- */
- int[] elementStart;
- int[] elementLength;
-
- /**
- * Whether an element is initialized or not.
- */
- boolean[] elementInited;
-
- /**
- * Whether an element is null or not. Because length is 0 does not means the
- * field is null. In particular, a 0-length string is not null.
- */
- boolean[] elementIsNull;
-
- /**
- * The elements of the array. Note that we call arrayElements[i].init(bytes,
- * begin, length) only when that element is accessed.
- */
- @SuppressWarnings("rawtypes")
- LazyObject[] arrayElements;
-
- /**
- * Construct a LazyArray object with the ObjectInspector.
- *
- * @param oi
- * the oi representing the type of this LazyArray
- */
- protected LazyArray(LazyListObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyArray.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- /**
- * Enlarge the size of arrays storing information for the elements inside
- * the array.
- */
- private void adjustArraySize(int newSize) {
- if (elementStart == null || elementStart.length < newSize) {
- elementStart = new int[newSize];
- elementLength = new int[newSize];
- elementInited = new boolean[newSize];
- elementIsNull = new boolean[newSize];
- arrayElements = new LazyObject[newSize];
- }
- }
-
- VInt vInt = new LazyUtils.VInt();
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
-
- /**
- * Parse the bytes and fill elementStart, elementLength, elementInited and
- * elementIsNull.
- */
- private void parse() {
-
- // get the vlong that represents the map size
- LazyUtils.readVInt(bytes, start, vInt);
- arraySize = vInt.value;
- if (0 == arraySize) {
- parsed = true;
- return;
- }
-
- // adjust arrays
- adjustArraySize(arraySize);
- // find out the null-bytes
- int arryByteStart = start + vInt.length;
- int nullByteCur = arryByteStart;
- int nullByteEnd = arryByteStart + (arraySize + 7) / 8;
- // the begin the real elements
- int lastElementByteEnd = nullByteEnd;
- // the list element object inspector
- ObjectInspector listEleObjectInspector = ((ListObjectInspector) oi)
- .getListElementObjectInspector();
- // parsing elements one by one
- for (int i = 0; i < arraySize; i++) {
- elementIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << (i % 8))) != 0) {
- elementIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(listEleObjectInspector, bytes,
- lastElementByteEnd, recordInfo);
- elementStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- elementLength[i] = recordInfo.elementSize;
- lastElementByteEnd = elementStart[i] + elementLength[i];
- }
- // move onto the next null byte
- if (7 == (i % 8)) {
- nullByteCur++;
- }
- }
-
- Arrays.fill(elementInited, 0, arraySize, false);
- parsed = true;
- }
-
- /**
- * Returns the actual primitive object at the index position inside the
- * array represented by this LazyObject.
- */
- public Object getListElementObject(int index) {
- if (!parsed) {
- parse();
- }
- if (index < 0 || index >= arraySize) {
- return null;
- }
- return uncheckedGetElement(index);
- }
-
- /**
- * Get the element without checking out-of-bound index.
- *
- * @param index
- * index to the array element
- */
- private Object uncheckedGetElement(int index) {
-
- if (elementIsNull[index]) {
- return null;
- } else {
- if (!elementInited[index]) {
- elementInited[index] = true;
- if (arrayElements[index] == null) {
- arrayElements[index] = LazyFactory.createLazyObject((oi)
- .getListElementObjectInspector());
- }
- arrayElements[index].init(bytes, elementStart[index],
- elementLength[index]);
- }
- }
- return arrayElements[index].getObject();
- }
-
- /**
- * Returns the array size.
- */
- public int getListLength() {
- if (!parsed) {
- parse();
- }
- return arraySize;
- }
-
- /**
- * cachedList is reused every time getList is called. Different
- * LazyBianryArray instances cannot share the same cachedList.
- */
- ArrayList<Object> cachedList;
-
- /**
- * Returns the List of actual primitive objects. Returns null for null
- * array.
- */
- public List<Object> getList() {
- if (!parsed) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>(arraySize);
- } else {
- cachedList.clear();
- }
- for (int index = 0; index < arraySize; index++) {
- cachedList.add(uncheckedGetElement(index));
- }
- return cachedList;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
deleted file mode 100644
index 83b6254..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyBoolean.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.BooleanWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
-
-/**
- * LazyObject for storing a value of boolean.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyBoolean extends
- LazyPrimitive<LazyBooleanObjectInspector, BooleanWritable> {
-
- public LazyBoolean(LazyBooleanObjectInspector oi) {
- super(oi);
- data = new BooleanWritable();
- }
-
- public LazyBoolean(LazyBoolean copy) {
- super(copy);
- data = new BooleanWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- // a temporal hack
- assert (1 == length);
- byte val = bytes[start];
- if (val == 0) {
- data.set(false);
- } else if (val == 1) {
- data.set(true);
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
deleted file mode 100644
index 264015b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyByte.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.ByteWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
-
-/**
- * LazyObject for storing a value of Byte.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyByte extends
- LazyPrimitive<LazyByteObjectInspector, ByteWritable> {
-
- public LazyByte(LazyByteObjectInspector oi) {
- super(oi);
- data = new ByteWritable();
- }
-
- public LazyByte(LazyByte copy) {
- super(copy);
- data = new ByteWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- assert (1 == length);
- data.set(bytes[start]);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
deleted file mode 100644
index a25ae49..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyColumnar.java
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyObject for storing a struct. The field of a struct can be primitive or
- * non-primitive.
- *
- * LazyStruct does not deal with the case of a NULL struct. That is handled by
- * the parent LazyObject.
- */
-@SuppressWarnings("rawtypes")
-public class LazyColumnar extends LazyNonPrimitive<LazyColumnarObjectInspector> {
-
- /**
- * IFrameTupleReference: the backend of the struct
- */
- IFrameTupleReference tuple;
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean reset;
-
- /**
- * The fields of the struct.
- */
- LazyObject[] fields;
-
- /**
- * Whether init() has been called on the field or not.
- */
- boolean[] fieldVisited;
-
- /**
- * whether it is the first time initialization
- */
- boolean start = true;
-
- /**
- * Construct a LazyStruct object with the ObjectInspector.
- */
- public LazyColumnar(LazyColumnarObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyStruct.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- reset = false;
- }
-
- /**
- * Parse the byte[] and fill each field.
- */
- private void parse() {
-
- if (start) {
- // initialize field array and reusable objects
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
- .getAllStructFieldRefs();
-
- fields = new LazyObject[fieldRefs.size()];
- for (int i = 0; i < fields.length; i++) {
- fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i)
- .getFieldObjectInspector());
- }
- fieldVisited = new boolean[fields.length];
- start = false;
- }
-
- Arrays.fill(fieldVisited, false);
- reset = true;
- }
-
- /**
- * Get one field out of the struct.
- *
- * If the field is a primitive field, return the actual object. Otherwise
- * return the LazyObject. This is because PrimitiveObjectInspector does not
- * have control over the object used by the user - the user simply directly
- * use the Object instead of going through Object
- * PrimitiveObjectInspector.get(Object).
- *
- * @param fieldID
- * The field ID
- * @return The field as a LazyObject
- */
- public Object getField(int fieldID) {
- if (!reset) {
- parse();
- }
- return uncheckedGetField(fieldID);
- }
-
- /**
- * Get the field out of the row without checking parsed. This is called by
- * both getField and getFieldsAsList.
- *
- * @param fieldID
- * The id of the field starting from 0.
- * @param nullSequence
- * The sequence representing NULL value.
- * @return The value of the field
- */
- private Object uncheckedGetField(int fieldID) {
- // get the buffer
- byte[] buffer = tuple.getFieldData(fieldID);
- // get the offset of the field
- int s1 = tuple.getFieldStart(fieldID);
- int l1 = tuple.getFieldLength(fieldID);
-
- if (!fieldVisited[fieldID]) {
- fieldVisited[fieldID] = true;
- fields[fieldID].init(buffer, s1, l1);
- }
- // if (fields[fieldID].getObject() == null) {
- // throw new IllegalStateException("illegal field " + fieldID);
- // }
- return fields[fieldID].getObject();
- }
-
- ArrayList<Object> cachedList;
-
- /**
- * Get the values of the fields as an ArrayList.
- *
- * @return The values of the fields as an ArrayList.
- */
- public ArrayList<Object> getFieldsAsList() {
- if (!reset) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>();
- } else {
- cachedList.clear();
- }
- for (int i = 0; i < fields.length; i++) {
- cachedList.add(uncheckedGetField(i));
- }
- return cachedList;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-
- protected boolean getParsed() {
- return reset;
- }
-
- protected void setParsed(boolean parsed) {
- this.reset = parsed;
- }
-
- protected LazyObject[] getFields() {
- return fields;
- }
-
- protected void setFields(LazyObject[] fields) {
- this.fields = fields;
- }
-
- protected boolean[] getFieldInited() {
- return fieldVisited;
- }
-
- protected void setFieldInited(boolean[] fieldInited) {
- this.fieldVisited = fieldInited;
- }
-
- /**
- * rebind a frametuplereference to the struct
- */
- public void init(IFrameTupleReference r) {
- this.tuple = r;
- reset = false;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
deleted file mode 100644
index e7593e4..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyFactory.java
+++ /dev/null
@@ -1,97 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyColumnarObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyListObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyByteObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyFloatObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
-
-/**
- * LazyFactory.
- *
- */
-public final class LazyFactory {
-
- /**
- * Create a lazy binary primitive class given the type name.
- */
- public static LazyPrimitive<?, ?> createLazyPrimitiveClass(
- PrimitiveObjectInspector oi) {
- PrimitiveCategory p = oi.getPrimitiveCategory();
- switch (p) {
- case BOOLEAN:
- return new LazyBoolean((LazyBooleanObjectInspector) oi);
- case BYTE:
- return new LazyByte((LazyByteObjectInspector) oi);
- case SHORT:
- return new LazyShort((LazyShortObjectInspector) oi);
- case INT:
- return new LazyInteger((LazyIntObjectInspector) oi);
- case LONG:
- return new LazyLong((LazyLongObjectInspector) oi);
- case FLOAT:
- return new LazyFloat((LazyFloatObjectInspector) oi);
- case DOUBLE:
- return new LazyDouble((LazyDoubleObjectInspector) oi);
- case STRING:
- return new LazyString((LazyStringObjectInspector) oi);
- default:
- throw new RuntimeException("Internal error: no LazyObject for " + p);
- }
- }
-
- /**
- * Create a hierarchical LazyObject based on the given typeInfo.
- */
- public static LazyObject<? extends ObjectInspector> createLazyObject(
- ObjectInspector oi) {
- ObjectInspector.Category c = oi.getCategory();
- switch (c) {
- case PRIMITIVE:
- return createLazyPrimitiveClass((PrimitiveObjectInspector) oi);
- case MAP:
- return new LazyMap((LazyMapObjectInspector) oi);
- case LIST:
- return new LazyArray((LazyListObjectInspector) oi);
- case STRUCT: // check whether it is a top-level struct
- if (oi instanceof LazyStructObjectInspector)
- return new LazyStruct((LazyStructObjectInspector) oi);
- else
- return new LazyColumnar((LazyColumnarObjectInspector) oi);
- default:
- throw new RuntimeException("Hive LazySerDe Internal error.");
- }
- }
-
- private LazyFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
deleted file mode 100644
index c908c40..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyInteger.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.IntWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyIntObjectInspector;
-
-/**
- * LazyObject for storing a value of Integer.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyInteger extends
- LazyPrimitive<LazyIntObjectInspector, IntWritable> {
-
- public LazyInteger(LazyIntObjectInspector oi) {
- super(oi);
- data = new IntWritable();
- }
-
- public LazyInteger(LazyInteger copy) {
- super(copy);
- data = new IntWritable(copy.data.get());
- }
-
- /**
- * The reusable vInt for decoding the integer.
- */
- VInt vInt = new LazyUtils.VInt();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- LazyUtils.readVInt(bytes, start, vInt);
- assert (length == vInt.length);
- if (length != vInt.length)
- throw new IllegalStateException(
- "parse int: length mismatch, expected " + vInt.length
- + " but get " + length);
- data.set(vInt.value);
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
deleted file mode 100644
index 38097e6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyLong.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VLong;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyLongObjectInspector;
-
-/**
- * LazyObject for storing a value of Long.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyLong extends
- LazyPrimitive<LazyLongObjectInspector, LongWritable> {
-
- public LazyLong(LazyLongObjectInspector oi) {
- super(oi);
- data = new LongWritable();
- }
-
- public LazyLong(LazyLong copy) {
- super(copy);
- data = new LongWritable(copy.data.get());
- }
-
- /**
- * The reusable vLong for decoding the long.
- */
- VLong vLong = new LazyUtils.VLong();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- LazyUtils.readVLong(bytes, start, vLong);
- assert (length == vLong.length);
- if (length != vLong.length)
- throw new IllegalStateException("parse long: length mismatch");
- data.set(vLong.value);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
deleted file mode 100644
index 56bc41b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyMap.java
+++ /dev/null
@@ -1,337 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.Arrays;
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyMapObjectInspector;
-
-/**
- * LazyMap is serialized as follows: start A b c b c b c end bytes[] ->
- * |--------|---|---|---|---| ... |---|---|
- *
- * Section A is the null-bytes. Suppose the map has N key-value pairs, then
- * there are (N*2+7)/8 bytes used as null-bytes. Each bit corresponds to a key
- * or a value and it indicates whether that key or value is null (0) or not null
- * (1).
- *
- * After A, all the bytes are actual serialized data of the map, which are
- * key-value pairs. b represent the keys and c represent the values. Each of
- * them is again a LazyObject.
- *
- */
-
-@SuppressWarnings("rawtypes")
-public class LazyMap extends LazyNonPrimitive<LazyMapObjectInspector> {
-
- private static Log LOG = LogFactory.getLog(LazyMap.class.getName());
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed;
-
- /**
- * The size of the map. Only valid when the data is parsed. -1 when the map
- * is NULL.
- */
- int mapSize = 0;
-
- /**
- * The beginning position and length of key[i] and value[i]. Only valid when
- * the data is parsed.
- */
- int[] keyStart;
- int[] keyLength;
- int[] valueStart;
- int[] valueLength;
- /**
- * Whether valueObjects[i]/keyObjects[i] is initialized or not.
- */
- boolean[] keyInited;
- boolean[] valueInited;
-
- /**
- * Whether valueObjects[i]/keyObjects[i] is null or not This could not be
- * inferred from the length of the object. In particular, a 0-length string
- * is not null.
- */
- boolean[] keyIsNull;
- boolean[] valueIsNull;
-
- /**
- * The keys are stored in an array of LazyPrimitives.
- */
- LazyPrimitive<?, ?>[] keyObjects;
- /**
- * The values are stored in an array of LazyObjects. value[index] will start
- * from KeyEnd[index] + 1, and ends before KeyStart[index+1] - 1.
- */
- LazyObject[] valueObjects;
-
- protected LazyMap(LazyMapObjectInspector oi) {
- super(oi);
- }
-
- /**
- * Set the row data for this LazyMap.
- *
- * @see LazyObject#init(ByteArrayRef, int, int)
- */
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- /**
- * Adjust the size of arrays: keyStart, keyLength valueStart, valueLength
- * keyInited, keyIsNull valueInited, valueIsNull.
- */
- protected void adjustArraySize(int newSize) {
- if (keyStart == null || keyStart.length < newSize) {
- keyStart = new int[newSize];
- keyLength = new int[newSize];
- valueStart = new int[newSize];
- valueLength = new int[newSize];
- keyInited = new boolean[newSize];
- keyIsNull = new boolean[newSize];
- valueInited = new boolean[newSize];
- valueIsNull = new boolean[newSize];
- keyObjects = new LazyPrimitive<?, ?>[newSize];
- valueObjects = new LazyObject[newSize];
- }
- }
-
- boolean nullMapKey = false;
- VInt vInt = new LazyUtils.VInt();
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
-
- /**
- * Parse the byte[] and fill keyStart, keyLength, keyIsNull valueStart,
- * valueLength and valueIsNull.
- */
- private void parse() {
-
- // get the VInt that represents the map size
- LazyUtils.readVInt(bytes, start, vInt);
- mapSize = vInt.value;
- if (0 == mapSize) {
- parsed = true;
- return;
- }
-
- // adjust arrays
- adjustArraySize(mapSize);
-
- // find out the null-bytes
- int mapByteStart = start + vInt.length;
- int nullByteCur = mapByteStart;
- int nullByteEnd = mapByteStart + (mapSize * 2 + 7) / 8;
- int lastElementByteEnd = nullByteEnd;
-
- // parsing the keys and values one by one
- for (int i = 0; i < mapSize; i++) {
- // parse a key
- keyIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << ((i * 2) % 8))) != 0) {
- keyIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(
- ((MapObjectInspector) oi).getMapKeyObjectInspector(),
- bytes, lastElementByteEnd, recordInfo);
- keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- keyLength[i] = recordInfo.elementSize;
- lastElementByteEnd = keyStart[i] + keyLength[i];
- } else if (!nullMapKey) {
- nullMapKey = true;
- LOG.warn("Null map key encountered! Ignoring similar problems.");
- }
-
- // parse a value
- valueIsNull[i] = true;
- if ((bytes[nullByteCur] & (1 << ((i * 2 + 1) % 8))) != 0) {
- valueIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(
- ((MapObjectInspector) oi).getMapValueObjectInspector(),
- bytes, lastElementByteEnd, recordInfo);
- valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
- valueLength[i] = recordInfo.elementSize;
- lastElementByteEnd = valueStart[i] + valueLength[i];
- }
-
- // move onto the next null byte
- if (3 == (i % 4)) {
- nullByteCur++;
- }
- }
-
- Arrays.fill(keyInited, 0, mapSize, false);
- Arrays.fill(valueInited, 0, mapSize, false);
- parsed = true;
- }
-
- /**
- * Get the value object with the index without checking parsed.
- *
- * @param index
- * The index into the array starting from 0
- */
- private LazyObject uncheckedGetValue(int index) {
- if (valueIsNull[index]) {
- return null;
- }
- if (!valueInited[index]) {
- valueInited[index] = true;
- if (valueObjects[index] == null) {
- valueObjects[index] = LazyFactory
- .createLazyObject(((MapObjectInspector) oi)
- .getMapValueObjectInspector());
- }
- valueObjects[index].init(bytes, valueStart[index],
- valueLength[index]);
- }
- return valueObjects[index];
- }
-
- /**
- * Get the value in the map for the key.
- *
- * If there are multiple matches (which is possible in the serialized
- * format), only the first one is returned.
- *
- * The most efficient way to get the value for the key is to serialize the
- * key and then try to find it in the array. We do linear search because in
- * most cases, user only wants to get one or two values out of the map, and
- * the cost of building up a HashMap is substantially higher.
- *
- * @param key
- * The key object that we are looking for.
- * @return The corresponding value object, or NULL if not found
- */
- public Object getMapValueElement(Object key) {
- if (!parsed) {
- parse();
- }
- // search for the key
- for (int i = 0; i < mapSize; i++) {
- LazyPrimitive<?, ?> lazyKeyI = uncheckedGetKey(i);
- if (lazyKeyI == null) {
- continue;
- }
- // getWritableObject() will convert LazyPrimitive to actual
- // primitive
- // writable objects.
- Object keyI = lazyKeyI.getWritableObject();
- if (keyI == null) {
- continue;
- }
- if (keyI.equals(key)) {
- // Got a match, return the value
- LazyObject v = uncheckedGetValue(i);
- return v == null ? v : v.getObject();
- }
- }
- return null;
- }
-
- /**
- * Get the key object with the index without checking parsed.
- *
- * @param index
- * The index into the array starting from 0
- */
- private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
- if (keyIsNull[index]) {
- return null;
- }
- if (!keyInited[index]) {
- keyInited[index] = true;
- if (keyObjects[index] == null) {
- // Keys are always primitive
- keyObjects[index] = LazyFactory
- .createLazyPrimitiveClass((PrimitiveObjectInspector) ((MapObjectInspector) oi)
- .getMapKeyObjectInspector());
- }
- keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
- }
- return keyObjects[index];
- }
-
- /**
- * cachedMap is reused for different calls to getMap(). But each LazyMap has
- * a separate cachedMap so we won't overwrite the data by accident.
- */
- LinkedHashMap<Object, Object> cachedMap;
-
- /**
- * Return the map object representing this LazyMap. Note that the keyObjects
- * will be Writable primitive objects.
- *
- * @return the map object
- */
- public Map<Object, Object> getMap() {
- if (!parsed) {
- parse();
- }
- if (cachedMap == null) {
- // Use LinkedHashMap to provide deterministic order
- cachedMap = new LinkedHashMap<Object, Object>();
- } else {
- cachedMap.clear();
- }
-
- // go through each element of the map
- for (int i = 0; i < mapSize; i++) {
- LazyPrimitive<?, ?> lazyKey = uncheckedGetKey(i);
- if (lazyKey == null) {
- continue;
- }
- Object key = lazyKey.getObject();
- // do not overwrite if there are duplicate keys
- if (key != null && !cachedMap.containsKey(key)) {
- LazyObject lazyValue = uncheckedGetValue(i);
- Object value = (lazyValue == null ? null : lazyValue
- .getObject());
- cachedMap.put(key, value);
- }
- }
- return cachedMap;
- }
-
- /**
- * Get the size of the map represented by this LazyMap.
- *
- * @return The size of the map
- */
- public int getMapSize() {
- if (!parsed) {
- parse();
- }
- return mapSize;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
deleted file mode 100644
index b151f2d..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyNonPrimitive.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyPrimitive stores a primitive Object in a LazyObject.
- */
-public abstract class LazyNonPrimitive<OI extends ObjectInspector> extends
- LazyObject<OI> {
-
- protected byte[] bytes;
- protected int start;
- protected int length;
-
- /**
- * Create a LazyNonPrimitive object with the specified ObjectInspector.
- *
- * @param oi
- * The ObjectInspector would have to have a hierarchy of
- * LazyObjectInspectors with the leaf nodes being
- * WritableObjectInspectors. It's used both for accessing the
- * type hierarchy of the complex object, as well as getting meta
- * information (separator, nullSequence, etc) when parsing the
- * lazy object.
- */
- protected LazyNonPrimitive(OI oi) {
- super(oi);
- bytes = null;
- start = 0;
- length = 0;
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (bytes == null) {
- throw new RuntimeException("bytes cannot be null!");
- }
- this.bytes = bytes;
- this.start = start;
- this.length = length;
- assert start >= 0;
- assert start + length <= bytes.length;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-
- @Override
- public int hashCode() {
- return LazyUtils.hashBytes(bytes, start, length);
- }
-
- @Override
- public void init(IFrameTupleReference tuple) {
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
deleted file mode 100644
index 9aaaa88..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyObject.java
+++ /dev/null
@@ -1,78 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyObject stores an object in a range of bytes in a byte[].
- *
- * A LazyObject can represent any primitive object or hierarchical object like
- * array, map or struct.
- */
-public abstract class LazyObject<OI extends ObjectInspector> {
-
- OI oi;
-
- /**
- * Create a LazyObject.
- *
- * @param oi
- * Derived classes can access meta information about this Lazy
- * Object (e.g, separator, nullSequence, escaper) from it.
- */
- protected LazyObject(OI oi) {
- this.oi = oi;
- }
-
- /**
- * Set the data for this LazyObject. We take ByteArrayRef instead of byte[]
- * so that we will be able to drop the reference to byte[] by a single
- * assignment. The ByteArrayRef object can be reused across multiple rows.
- *
- * @param bytes
- * The wrapper of the byte[].
- * @param start
- * The start position inside the bytes.
- * @param length
- * The length of the data, starting from "start"
- * @see ByteArrayRef
- */
- public abstract void init(byte[] bytes, int start, int length);
-
- public abstract void init(IFrameTupleReference tuple);
-
- /**
- * If the LazyObject is a primitive Object, then deserialize it and return
- * the actual primitive Object. Otherwise (array, map, struct), return this.
- */
- public abstract Object getObject();
-
- @Override
- public abstract int hashCode();
-
- protected OI getInspector() {
- return oi;
- }
-
- protected void setInspector(OI oi) {
- this.oi = oi;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
deleted file mode 100644
index 888e5b2..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyPrimitive.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
-
-/**
- * LazyPrimitive stores a primitive Object in a LazyObject.
- */
-public abstract class LazyPrimitive<OI extends ObjectInspector, T extends Writable>
- extends LazyObject<OI> {
-
- LazyPrimitive(OI oi) {
- super(oi);
- }
-
- LazyPrimitive(LazyPrimitive<OI, T> copy) {
- super(copy.oi);
- isNull = copy.isNull;
- }
-
- T data;
- boolean isNull = false;
-
- /**
- * Returns the primitive object represented by this LazyObject. This is
- * useful because it can make sure we have "null" for null objects.
- */
- @Override
- public Object getObject() {
- return isNull ? null : this;
- }
-
- public T getWritableObject() {
- return isNull ? null : data;
- }
-
- @Override
- public String toString() {
- return isNull ? "null" : data.toString();
- }
-
- @Override
- public int hashCode() {
- return isNull ? 0 : data.hashCode();
- }
-
- @Override
- public void init(IFrameTupleReference tuple) {
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
deleted file mode 100644
index 4d0dff6..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazySerDe.java
+++ /dev/null
@@ -1,477 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.ByteStream;
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-/**
- * The LazySerDe class combines the lazy property of LazySimpleSerDe class and
- * the binary property of BinarySortable class. Lazy means a field is not
- * deserialized until required. Binary means a field is serialized in binary
- * compact format.
- */
-public class LazySerDe implements SerDe {
-
- public static final Log LOG = LogFactory.getLog(LazySerDe.class.getName());
-
- public LazySerDe() {
- }
-
- List<String> columnNames;
- List<TypeInfo> columnTypes;
-
- TypeInfo rowTypeInfo;
- ObjectInspector cachedObjectInspector;
-
- // The object for storing row data
- LazyColumnar cachedLazyStruct;
-
- /**
- * Initialize the SerDe with configuration and table information.
- */
- @Override
- public void initialize(Configuration conf, Properties tbl)
- throws SerDeException {
- // Get column names and types
- String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
- String columnTypeProperty = tbl
- .getProperty(Constants.LIST_COLUMN_TYPES);
- if (columnNameProperty.length() == 0) {
- columnNames = new ArrayList<String>();
- } else {
- columnNames = Arrays.asList(columnNameProperty.split(","));
- }
- if (columnTypeProperty.length() == 0) {
- columnTypes = new ArrayList<TypeInfo>();
- } else {
- columnTypes = TypeInfoUtils
- .getTypeInfosFromTypeString(columnTypeProperty);
- }
- assert (columnNames.size() == columnTypes.size());
- // Create row related objects
- rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames,
- columnTypes);
- // Create the object inspector and the lazy binary struct object
- cachedObjectInspector = LazyUtils.getLazyObjectInspectorFromTypeInfo(
- rowTypeInfo, true);
- cachedLazyStruct = (LazyColumnar) LazyFactory
- .createLazyObject(cachedObjectInspector);
- // output debug info
- LOG.debug("LazySerDe initialized with: columnNames=" + columnNames
- + " columnTypes=" + columnTypes);
- }
-
- /**
- * Returns the ObjectInspector for the row.
- */
- @Override
- public ObjectInspector getObjectInspector() throws SerDeException {
- return cachedObjectInspector;
- }
-
- /**
- * Returns the Writable Class after serialization.
- */
- @Override
- public Class<? extends Writable> getSerializedClass() {
- return BytesWritable.class;
- }
-
- // The wrapper for byte array
- ByteArrayRef byteArrayRef;
-
- /**
- * Deserialize a table record to a Lazy struct.
- */
- @SuppressWarnings("deprecation")
- @Override
- public Object deserialize(Writable field) throws SerDeException {
- if (byteArrayRef == null) {
- byteArrayRef = new ByteArrayRef();
- }
- if (field instanceof BytesWritable) {
- BytesWritable b = (BytesWritable) field;
- if (b.getSize() == 0) {
- return null;
- }
- // For backward-compatibility with hadoop 0.17
- byteArrayRef.setData(b.get());
- cachedLazyStruct.init(byteArrayRef.getData(), 0, b.getSize());
- } else if (field instanceof Text) {
- Text t = (Text) field;
- if (t.getLength() == 0) {
- return null;
- }
- byteArrayRef.setData(t.getBytes());
- cachedLazyStruct.init(byteArrayRef.getData(), 0, t.getLength());
- } else {
- throw new SerDeException(getClass().toString()
- + ": expects either BytesWritable or Text object!");
- }
- return cachedLazyStruct;
- }
-
- /**
- * The reusable output buffer and serialize byte buffer.
- */
- BytesWritable serializeBytesWritable = new BytesWritable();
- ByteStream.Output serializeByteStream = new ByteStream.Output();
-
- /**
- * Serialize an object to a byte buffer in a binary compact way.
- */
- @Override
- public Writable serialize(Object obj, ObjectInspector objInspector)
- throws SerDeException {
- // make sure it is a struct record or not
- serializeByteStream.reset();
-
- if (objInspector.getCategory() != Category.STRUCT) {
- // serialize the primitive object
- serialize(serializeByteStream, obj, objInspector);
- } else {
- // serialize the row as a struct
- serializeStruct(serializeByteStream, obj,
- (StructObjectInspector) objInspector);
- }
- // return the serialized bytes
- serializeBytesWritable.set(serializeByteStream.getData(), 0,
- serializeByteStream.getCount());
- return serializeBytesWritable;
- }
-
- boolean nullMapKey = false;
-
- /**
- * Serialize a struct object without writing the byte size. This function is
- * shared by both row serialization and struct serialization.
- *
- * @param byteStream
- * the byte stream storing the serialization data
- * @param obj
- * the struct object to serialize
- * @param objInspector
- * the struct object inspector
- */
- private void serializeStruct(Output byteStream, Object obj,
- StructObjectInspector soi) {
- // do nothing for null struct
- if (null == obj) {
- return;
- }
- /*
- * Interleave serializing one null byte and 8 struct fields in each
- * round, in order to support data deserialization with different table
- * schemas
- */
- List<? extends StructField> fields = soi.getAllStructFieldRefs();
- int size = fields.size();
- int lasti = 0;
- byte nullByte = 0;
- for (int i = 0; i < size; i++) {
- // set bit to 1 if a field is not null
- if (null != soi.getStructFieldData(obj, fields.get(i))) {
- nullByte |= 1 << (i % 8);
- }
- // write the null byte every eight elements or
- // if this is the last element and serialize the
- // corresponding 8 struct fields at the same time
- if (7 == i % 8 || i == size - 1) {
- serializeByteStream.write(nullByte);
- for (int j = lasti; j <= i; j++) {
- serialize(serializeByteStream, soi.getStructFieldData(obj,
- fields.get(j)), fields.get(j)
- .getFieldObjectInspector());
- }
- lasti = i + 1;
- nullByte = 0;
- }
- }
- }
-
- /**
- * A recursive function that serialize an object to a byte buffer based on
- * its object inspector.
- *
- * @param byteStream
- * the byte stream storing the serialization data
- * @param obj
- * the object to serialize
- * @param objInspector
- * the object inspector
- */
- private void serialize(Output byteStream, Object obj,
- ObjectInspector objInspector) {
-
- // do nothing for null object
- if (null == obj) {
- return;
- }
-
- switch (objInspector.getCategory()) {
- case PRIMITIVE: {
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) objInspector;
- switch (poi.getPrimitiveCategory()) {
- case VOID: {
- return;
- }
- case BOOLEAN: {
- boolean v = ((BooleanObjectInspector) poi).get(obj);
- byteStream.write((byte) (v ? 1 : 0));
- return;
- }
- case BYTE: {
- ByteObjectInspector boi = (ByteObjectInspector) poi;
- byte v = boi.get(obj);
- byteStream.write(v);
- return;
- }
- case SHORT: {
- ShortObjectInspector spoi = (ShortObjectInspector) poi;
- short v = spoi.get(obj);
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case INT: {
- IntObjectInspector ioi = (IntObjectInspector) poi;
- int v = ioi.get(obj);
- LazyUtils.writeVInt(byteStream, v);
- return;
- }
- case LONG: {
- LongObjectInspector loi = (LongObjectInspector) poi;
- long v = loi.get(obj);
- LazyUtils.writeVLong(byteStream, v);
- return;
- }
- case FLOAT: {
- FloatObjectInspector foi = (FloatObjectInspector) poi;
- int v = Float.floatToIntBits(foi.get(obj));
- byteStream.write((byte) (v >> 24));
- byteStream.write((byte) (v >> 16));
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case DOUBLE: {
- DoubleObjectInspector doi = (DoubleObjectInspector) poi;
- long v = Double.doubleToLongBits(doi.get(obj));
- byteStream.write((byte) (v >> 56));
- byteStream.write((byte) (v >> 48));
- byteStream.write((byte) (v >> 40));
- byteStream.write((byte) (v >> 32));
- byteStream.write((byte) (v >> 24));
- byteStream.write((byte) (v >> 16));
- byteStream.write((byte) (v >> 8));
- byteStream.write((byte) (v));
- return;
- }
- case STRING: {
- StringObjectInspector soi = (StringObjectInspector) poi;
- Text t = soi.getPrimitiveWritableObject(obj);
- /* write byte size of the string which is a vint */
- int length = t.getLength();
- LazyUtils.writeVInt(byteStream, length);
- /* write string itself */
- byte[] data = t.getBytes();
- byteStream.write(data, 0, length);
- return;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + poi.getPrimitiveCategory());
- }
- }
- }
- case LIST: {
- ListObjectInspector loi = (ListObjectInspector) objInspector;
- ObjectInspector eoi = loi.getListElementObjectInspector();
-
- // 1/ reserve spaces for the byte size of the list
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int listStart = byteStream.getCount();
-
- // 2/ write the size of the list as a VInt
- int size = loi.getListLength(obj);
- LazyUtils.writeVInt(byteStream, size);
-
- // 3/ write the null bytes
- byte nullByte = 0;
- for (int eid = 0; eid < size; eid++) {
- // set the bit to 1 if an element is not null
- if (null != loi.getListElement(obj, eid)) {
- nullByte |= 1 << (eid % 8);
- }
- // store the byte every eight elements or
- // if this is the last element
- if (7 == eid % 8 || eid == size - 1) {
- byteStream.write(nullByte);
- nullByte = 0;
- }
- }
-
- // 4/ write element by element from the list
- for (int eid = 0; eid < size; eid++) {
- serialize(byteStream, loi.getListElement(obj, eid), eoi);
- }
-
- // 5/ update the list byte size
- int listEnd = byteStream.getCount();
- int listSize = listEnd - listStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (listSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (listSize);
-
- return;
- }
- case MAP: {
- MapObjectInspector moi = (MapObjectInspector) objInspector;
- ObjectInspector koi = moi.getMapKeyObjectInspector();
- ObjectInspector voi = moi.getMapValueObjectInspector();
- Map<?, ?> map = moi.getMap(obj);
-
- // 1/ reserve spaces for the byte size of the map
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int mapStart = byteStream.getCount();
-
- // 2/ write the size of the map which is a VInt
- int size = map.size();
- LazyUtils.writeVInt(byteStream, size);
-
- // 3/ write the null bytes
- int b = 0;
- byte nullByte = 0;
- for (Map.Entry<?, ?> entry : map.entrySet()) {
- // set the bit to 1 if a key is not null
- if (null != entry.getKey()) {
- nullByte |= 1 << (b % 8);
- } else if (!nullMapKey) {
- nullMapKey = true;
- LOG.warn("Null map key encountered! Ignoring similar problems.");
- }
- b++;
- // set the bit to 1 if a value is not null
- if (null != entry.getValue()) {
- nullByte |= 1 << (b % 8);
- }
- b++;
- // write the byte to stream every 4 key-value pairs
- // or if this is the last key-value pair
- if (0 == b % 8 || b == size * 2) {
- byteStream.write(nullByte);
- nullByte = 0;
- }
- }
-
- // 4/ write key-value pairs one by one
- for (Map.Entry<?, ?> entry : map.entrySet()) {
- serialize(byteStream, entry.getKey(), koi);
- serialize(byteStream, entry.getValue(), voi);
- }
-
- // 5/ update the byte size of the map
- int mapEnd = byteStream.getCount();
- int mapSize = mapEnd - mapStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (mapSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (mapSize);
-
- return;
- }
- case STRUCT: {
- // 1/ reserve spaces for the byte size of the struct
- // which is a integer and takes four bytes
- int byteSizeStart = byteStream.getCount();
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- byteStream.write((byte) 0);
- int structStart = byteStream.getCount();
-
- // 2/ serialize the struct
- serializeStruct(byteStream, obj,
- (StructObjectInspector) objInspector);
-
- // 3/ update the byte size of the struct
- int structEnd = byteStream.getCount();
- int structSize = structEnd - structStart;
- byte[] bytes = byteStream.getData();
- bytes[byteSizeStart] = (byte) (structSize >> 24);
- bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
- bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
- bytes[byteSizeStart + 3] = (byte) (structSize);
-
- return;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + objInspector.getCategory());
- }
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
deleted file mode 100644
index 7484b72..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyShort.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyShortObjectInspector;
-
-/**
- * LazyObject for storing a value of Short.
- *
- * <p>
- * Part of the code is adapted from Apache Harmony Project.
- *
- * As with the specification, this implementation relied on code laid out in <a
- * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
- * Delight, (Addison Wesley, 2002)</a> as well as <a
- * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
- * </p>
- *
- */
-public class LazyShort extends
- LazyPrimitive<LazyShortObjectInspector, ShortWritable> {
-
- public LazyShort(LazyShortObjectInspector oi) {
- super(oi);
- data = new ShortWritable();
- }
-
- public LazyShort(LazyShort copy) {
- super(copy);
- data = new ShortWritable(copy.data.get());
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- assert (2 == length);
- data.set(LazyUtils.byteArrayToShort(bytes, start));
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
deleted file mode 100644
index c13533b..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyString.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.VInt;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.LazyStringObjectInspector;
-
-/**
- * LazyObject for storing a value of String.
- */
-public class LazyString extends LazyPrimitive<LazyStringObjectInspector, Text> {
-
- public LazyString(LazyStringObjectInspector oi) {
- super(oi);
- data = new Text();
- }
-
- public LazyString(LazyString copy) {
- super(copy);
- data = new Text(copy.data);
- }
-
- VInt vInt = new LazyUtils.VInt();
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- if (length == 0) {
- isNull = true;
- return;
- } else
- isNull = false;
-
- // get the byte length of the string
- LazyUtils.readVInt(bytes, start, vInt);
- if (vInt.value + vInt.length != length)
- throw new IllegalStateException(
- "parse string: length mismatch, expected "
- + (vInt.value + vInt.length) + " but get " + length);
- assert (length - vInt.length > -1);
- data.set(bytes, start + vInt.length, length - vInt.length);
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
deleted file mode 100644
index 61cc335..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyStruct.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils.RecordInfo;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyStructObjectInspector;
-
-/**
- * LazyStruct is serialized as follows: start A B A B A B end bytes[] ->
- * |-----|---------|--- ... ---|-----|---------|
- *
- * Section A is one null-byte, corresponding to eight struct fields in Section
- * B. Each bit indicates whether the corresponding field is null (0) or not null
- * (1). Each field is a LazyObject.
- *
- * Following B, there is another section A and B. This pattern repeats until the
- * all struct fields are serialized.
- */
-public class LazyStruct extends LazyNonPrimitive<LazyStructObjectInspector> {
-
- private static Log LOG = LogFactory.getLog(LazyStruct.class.getName());
-
- /**
- * Whether the data is already parsed or not.
- */
- boolean parsed;
-
- /**
- * The fields of the struct.
- */
- @SuppressWarnings("rawtypes")
- LazyObject[] fields;
-
- /**
- * Whether a field is initialized or not.
- */
- boolean[] fieldInited;
-
- /**
- * Whether a field is null or not. Because length is 0 does not means the
- * field is null. In particular, a 0-length string is not null.
- */
- boolean[] fieldIsNull;
-
- /**
- * The start positions and lengths of struct fields. Only valid when the
- * data is parsed.
- */
- int[] fieldStart;
- int[] fieldLength;
-
- /**
- * Construct a LazyStruct object with an ObjectInspector.
- */
- protected LazyStruct(LazyStructObjectInspector oi) {
- super(oi);
- }
-
- @Override
- public void init(byte[] bytes, int start, int length) {
- super.init(bytes, start, length);
- parsed = false;
- }
-
- RecordInfo recordInfo = new LazyUtils.RecordInfo();
- boolean missingFieldWarned = false;
- boolean extraFieldWarned = false;
-
- /**
- * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
- * fieldIsNull.
- */
- private void parse() {
-
- List<? extends StructField> fieldRefs = ((StructObjectInspector) oi)
- .getAllStructFieldRefs();
-
- if (fields == null) {
- fields = new LazyObject[fieldRefs.size()];
- for (int i = 0; i < fields.length; i++) {
- ObjectInspector insp = fieldRefs.get(i)
- .getFieldObjectInspector();
- fields[i] = insp == null ? null : LazyFactory
- .createLazyObject(insp);
- }
- fieldInited = new boolean[fields.length];
- fieldIsNull = new boolean[fields.length];
- fieldStart = new int[fields.length];
- fieldLength = new int[fields.length];
- }
-
- /**
- * Please note that one null byte is followed by eight fields, then more
- * null byte and fields.
- */
-
- int fieldId = 0;
- int structByteEnd = start + length;
-
- byte nullByte = bytes[start];
- int lastFieldByteEnd = start + 1;
- // Go through all bytes in the byte[]
- for (int i = 0; i < fields.length; i++) {
- fieldIsNull[i] = true;
- if ((nullByte & (1 << (i % 8))) != 0) {
- fieldIsNull[i] = false;
- LazyUtils.checkObjectByteInfo(fieldRefs.get(i)
- .getFieldObjectInspector(), bytes, lastFieldByteEnd,
- recordInfo);
- fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
- fieldLength[i] = recordInfo.elementSize;
- lastFieldByteEnd = fieldStart[i] + fieldLength[i];
- }
-
- // count how many fields are there
- if (lastFieldByteEnd <= structByteEnd) {
- fieldId++;
- }
- // next byte is a null byte if there are more bytes to go
- if (7 == (i % 8)) {
- if (lastFieldByteEnd < structByteEnd) {
- nullByte = bytes[lastFieldByteEnd];
- lastFieldByteEnd++;
- } else {
- // otherwise all null afterwards
- nullByte = 0;
- lastFieldByteEnd++;
- }
- }
- }
-
- // Extra bytes at the end?
- if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
- extraFieldWarned = true;
- LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
- + "problems.");
- }
-
- // Missing fields?
- if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
- missingFieldWarned = true;
- LOG.warn("Missing fields! Expected " + fields.length
- + " fields but " + "only got " + fieldId
- + "! Ignoring similar problems.");
- }
-
- Arrays.fill(fieldInited, false);
- parsed = true;
- }
-
- /**
- * Get one field out of the struct.
- *
- * If the field is a primitive field, return the actual object. Otherwise
- * return the LazyObject. This is because PrimitiveObjectInspector does not
- * have control over the object used by the user - the user simply directly
- * use the Object instead of going through Object
- * PrimitiveObjectInspector.get(Object).
- *
- * @param fieldID
- * The field ID
- * @return The field as a LazyObject
- */
- public Object getField(int fieldID) {
- if (!parsed) {
- parse();
- }
- return uncheckedGetField(fieldID);
- }
-
- /**
- * Get the field out of the row without checking parsed. This is called by
- * both getField and getFieldsAsList.
- *
- * @param fieldID
- * The id of the field starting from 0.
- * @return The value of the field
- */
- private Object uncheckedGetField(int fieldID) {
- // Test the length first so in most cases we avoid doing a byte[]
- // comparison.
- if (fieldIsNull[fieldID]) {
- return null;
- }
- if (!fieldInited[fieldID]) {
- fieldInited[fieldID] = true;
- fields[fieldID].init(bytes, fieldStart[fieldID],
- fieldLength[fieldID]);
- }
- return fields[fieldID].getObject();
- }
-
- ArrayList<Object> cachedList;
-
- /**
- * Get the values of the fields as an ArrayList.
- *
- * @return The values of the fields as an ArrayList.
- */
- public ArrayList<Object> getFieldsAsList() {
- if (!parsed) {
- parse();
- }
- if (cachedList == null) {
- cachedList = new ArrayList<Object>();
- } else {
- cachedList.clear();
- }
- for (int i = 0; i < fields.length; i++) {
- cachedList.add(uncheckedGetField(i));
- }
- return cachedList;
- }
-
- @Override
- public Object getObject() {
- return this;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
deleted file mode 100644
index 2d0406c..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/LazyUtils.java
+++ /dev/null
@@ -1,529 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.ByteStream.Output;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.WritableUtils;
-
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.LazyObjectInspectorFactory;
-import edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-
-/**
- * LazyUtils.
- *
- */
-public final class LazyUtils {
-
- /**
- * Convert the byte array to an int starting from the given offset. Refer to
- * code by aeden on DZone Snippets:
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the integer
- */
- public static int byteArrayToInt(byte[] b, int offset) {
- int value = 0;
- for (int i = 0; i < 4; i++) {
- int shift = (4 - 1 - i) * 8;
- value += (b[i + offset] & 0x000000FF) << shift;
- }
- return value;
- }
-
- /**
- * Convert the byte array to a long starting from the given offset.
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the long
- */
- public static long byteArrayToLong(byte[] b, int offset) {
- long value = 0;
- for (int i = 0; i < 8; i++) {
- int shift = (8 - 1 - i) * 8;
- value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
- }
- return value;
- }
-
- /**
- * Convert the byte array to a short starting from the given offset.
- *
- * @param b
- * the byte array
- * @param offset
- * the array offset
- * @return the short
- */
- public static short byteArrayToShort(byte[] b, int offset) {
- short value = 0;
- value += (b[offset] & 0x000000FF) << 8;
- value += (b[offset + 1] & 0x000000FF);
- return value;
- }
-
- /**
- * Record is the unit that data is serialized in. A record includes two
- * parts. The first part stores the size of the element and the second part
- * stores the real element. size element record ->
- * |----|-------------------------|
- *
- * A RecordInfo stores two information of a record, the size of the "size"
- * part which is the element offset and the size of the element part which
- * is element size.
- */
- public static class RecordInfo {
- public RecordInfo() {
- elementOffset = 0;
- elementSize = 0;
- }
-
- public byte elementOffset;
- public int elementSize;
-
- @Override
- public String toString() {
- return "(" + elementOffset + ", " + elementSize + ")";
- }
- }
-
- static VInt vInt = new LazyUtils.VInt();
-
- /**
- * Check a particular field and set its size and offset in bytes based on
- * the field type and the bytes arrays.
- *
- * For void, boolean, byte, short, int, long, float and double, there is no
- * offset and the size is fixed. For string, map, list, struct, the first
- * four bytes are used to store the size. So the offset is 4 and the size is
- * computed by concating the first four bytes together. The first four bytes
- * are defined with respect to the offset in the bytes arrays.
- *
- * @param objectInspector
- * object inspector of the field
- * @param bytes
- * bytes arrays store the table row
- * @param offset
- * offset of this field
- * @param recordInfo
- * modify this byteinfo object and return it
- */
- public static void checkObjectByteInfo(ObjectInspector objectInspector,
- byte[] bytes, int offset, RecordInfo recordInfo) {
- Category category = objectInspector.getCategory();
- switch (category) {
- case PRIMITIVE:
- PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector)
- .getPrimitiveCategory();
- switch (primitiveCategory) {
- case VOID:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 0;
- break;
- case BOOLEAN:
- case BYTE:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 1;
- break;
- case SHORT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 2;
- break;
- case FLOAT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 4;
- break;
- case DOUBLE:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = 8;
- break;
- case INT:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = WritableUtils
- .decodeVIntSize(bytes[offset]);
- break;
- case LONG:
- recordInfo.elementOffset = 0;
- recordInfo.elementSize = WritableUtils
- .decodeVIntSize(bytes[offset]);
- break;
- case STRING:
- // using vint instead of 4 bytes
- LazyUtils.readVInt(bytes, offset, vInt);
- recordInfo.elementOffset = vInt.length;
- recordInfo.elementSize = vInt.value;
- break;
- default: {
- throw new RuntimeException("Unrecognized primitive type: "
- + primitiveCategory);
- }
- }
- break;
- case LIST:
- case MAP:
- case STRUCT:
- recordInfo.elementOffset = 4;
- recordInfo.elementSize = LazyUtils.byteArrayToInt(bytes, offset);
- break;
- default: {
- throw new RuntimeException("Unrecognized non-primitive type: "
- + category);
- }
- }
- }
-
- /**
- * A zero-compressed encoded long.
- */
- public static class VLong {
- public VLong() {
- value = 0;
- length = 0;
- }
-
- public long value;
- public byte length;
- };
-
- /**
- * Reads a zero-compressed encoded long from a byte array and returns it.
- *
- * @param bytes
- * the byte array
- * @param offset
- * offset of the array to read from
- * @param vlong
- * storing the deserialized long and its size in byte
- */
- public static void readVLong(byte[] bytes, int offset, VLong vlong) {
- byte firstByte = bytes[offset];
- vlong.length = (byte) WritableUtils.decodeVIntSize(firstByte);
- if (vlong.length == 1) {
- vlong.value = firstByte;
- return;
- }
- long i = 0;
- for (int idx = 0; idx < vlong.length - 1; idx++) {
- byte b = bytes[offset + 1 + idx];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
- }
-
- /**
- * A zero-compressed encoded integer.
- */
- public static class VInt implements Serializable {
- private static final long serialVersionUID = 1L;
-
- public VInt() {
- value = 0;
- length = 0;
- }
-
- public int value;
- public byte length;
- };
-
- /**
- * Reads a zero-compressed encoded int from a byte array and returns it.
- *
- * @param bytes
- * the byte array
- * @param offset
- * offset of the array to read from
- * @param vInt
- * storing the deserialized int and its size in byte
- */
- public static void readVInt(byte[] bytes, int offset, VInt vInt) {
- byte firstByte = bytes[offset];
- vInt.length = (byte) WritableUtils.decodeVIntSize(firstByte);
- if (vInt.length == 1) {
- vInt.value = firstByte;
- return;
- }
- int i = 0;
- for (int idx = 0; idx < vInt.length - 1; idx++) {
- byte b = bytes[offset + 1 + idx];
- i = i << 8;
- i = i | (b & 0xFF);
- }
- vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
- }
-
- /**
- * Writes a zero-compressed encoded int to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param i
- * the int
- */
- public static void writeVInt(Output byteStream, int i) {
- writeVLong(byteStream, i);
- }
-
- /**
- * Write a zero-compressed encoded long to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param l
- * the long
- */
- public static void writeVLong(Output byteStream, long l) {
- if (l >= -112 && l <= 127) {
- byteStream.write((byte) l);
- return;
- }
-
- int len = -112;
- if (l < 0) {
- l ^= -1L; // take one's complement'
- len = -120;
- }
-
- long tmp = l;
- while (tmp != 0) {
- tmp = tmp >> 8;
- len--;
- }
-
- byteStream.write((byte) len);
-
- len = (len < -120) ? -(len + 120) : -(len + 112);
-
- for (int idx = len; idx != 0; idx--) {
- int shiftbits = (idx - 1) * 8;
- long mask = 0xFFL << shiftbits;
- byteStream.write((byte) ((l & mask) >> shiftbits));
- }
- }
-
- static Map<TypeInfo, ObjectInspector> cachedLazyObjectInspector = new ConcurrentHashMap<TypeInfo, ObjectInspector>();
-
- /**
- * Returns the lazy binary object inspector that can be used to inspect an
- * lazy binary object of that typeInfo
- *
- * For primitive types, we use the standard writable object inspector.
- */
- public static ObjectInspector getLazyObjectInspectorFromTypeInfo(
- TypeInfo typeInfo, boolean topLevel) {
- if (typeInfo == null)
- throw new IllegalStateException("illegal type null ");
- ObjectInspector result = cachedLazyObjectInspector.get(typeInfo);
- if (result == null) {
- switch (typeInfo.getCategory()) {
- case PRIMITIVE: {
- result = PrimitiveObjectInspectorFactory
- .getPrimitiveLazyObjectInspector(((PrimitiveTypeInfo) typeInfo)
- .getPrimitiveCategory());
- break;
- }
- case LIST: {
- ObjectInspector elementObjectInspector = getLazyObjectInspectorFromTypeInfo(
- ((ListTypeInfo) typeInfo).getListElementTypeInfo(),
- false);
- result = LazyObjectInspectorFactory
- .getLazyListObjectInspector(elementObjectInspector);
- break;
- }
- case MAP: {
- MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
- ObjectInspector keyObjectInspector = getLazyObjectInspectorFromTypeInfo(
- mapTypeInfo.getMapKeyTypeInfo(), false);
- ObjectInspector valueObjectInspector = getLazyObjectInspectorFromTypeInfo(
- mapTypeInfo.getMapValueTypeInfo(), false);
- result = LazyObjectInspectorFactory.getLazyMapObjectInspector(
- keyObjectInspector, valueObjectInspector);
- break;
- }
- case STRUCT: {
- StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
- List<String> fieldNames = structTypeInfo
- .getAllStructFieldNames();
- List<TypeInfo> fieldTypeInfos = structTypeInfo
- .getAllStructFieldTypeInfos();
- List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
- fieldTypeInfos.size());
-
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors
- .add(getLazyObjectInspectorFromTypeInfo(
- fieldTypeInfos.get(i), false));
- }
-
- // if it is top level then create columnar
- if (topLevel)
- result = LazyObjectInspectorFactory
- .getLazyColumnarObjectInspector(fieldNames,
- fieldObjectInspectors);
- // if it is not top level then create struct
- else
- result = LazyObjectInspectorFactory
- .getLazyStructObjectInspector(fieldNames,
- fieldObjectInspectors);
-
- break;
- }
- default: {
- result = null;
- }
- }
- cachedLazyObjectInspector.put(typeInfo, result);
- }
- return result;
- }
-
- /**
- * get top-level lazy object inspector
- *
- * @param fieldNames
- * @param fieldTypeInfos
- * @return
- */
- public static ObjectInspector getLazyObjectInspector(
- List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
- List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
- fieldTypeInfos.size());
- for (int i = 0; i < fieldTypeInfos.size(); i++) {
- fieldObjectInspectors.add(getLazyObjectInspectorFromTypeInfo(
- fieldTypeInfos.get(i), false));
- }
-
- return LazyObjectInspectorFactory.getLazyColumnarObjectInspector(
- fieldNames, fieldObjectInspectors);
- }
-
- private LazyUtils() {
- // prevent instantiation
- }
-
- /**
- * Returns -1 if the first byte sequence is lexicographically less than the
- * second; returns +1 if the second byte sequence is lexicographically less
- * than the first; otherwise return 0.
- */
- public static int compare(byte[] b1, int start1, int length1, byte[] b2,
- int start2, int length2) {
-
- int min = Math.min(length1, length2);
-
- for (int i = 0; i < min; i++) {
- if (b1[start1 + i] == b2[start2 + i]) {
- continue;
- }
- if (b1[start1 + i] < b2[start2 + i]) {
- return -1;
- } else {
- return 1;
- }
- }
-
- if (length1 < length2) {
- return -1;
- }
- if (length1 > length2) {
- return 1;
- }
- return 0;
- }
-
- public static int hashBytes(byte[] data, int start, int len) {
- int hash = 1;
- for (int i = start; i < len; i++) {
- hash = (31 * hash) + data[i];
- }
- return hash;
- }
-
- /**
- * Writes a zero-compressed encoded int to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param i
- * the int
- */
- public static void writeVInt(DataOutput byteStream, int i)
- throws IOException {
- writeVLong(byteStream, i);
- }
-
- /**
- * Write a zero-compressed encoded long to a byte array.
- *
- * @param byteStream
- * the byte array/stream
- * @param l
- * the long
- */
- public static void writeVLong(DataOutput byteStream, long l)
- throws IOException {
- if (l >= -112 && l <= 127) {
- byteStream.write((byte) l);
- return;
- }
-
- int len = -112;
- if (l < 0) {
- l ^= -1L; // take one's complement'
- len = -120;
- }
-
- long tmp = l;
- while (tmp != 0) {
- tmp = tmp >> 8;
- len--;
- }
-
- byteStream.write((byte) len);
-
- len = (len < -120) ? -(len + 120) : -(len + 112);
-
- for (int idx = len; idx != 0; idx--) {
- int shiftbits = (idx - 1) * 8;
- long mask = 0xFFL << shiftbits;
- byteStream.write((byte) ((l & mask) >> shiftbits));
- }
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
deleted file mode 100644
index b20f185..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyColumnarObjectInspector.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.io.Serializable;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyColumnar;
-
-/**
- * ObjectInspector for LazyColumnar.
- *
- * @see LazyColumnar
- */
-public class LazyColumnarObjectInspector extends StandardStructObjectInspector
- implements Serializable {
-
- private static final long serialVersionUID = 1L;
-
- public LazyColumnarObjectInspector(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- super(structFieldNames, structFieldObjectInspectors);
- }
-
- public LazyColumnarObjectInspector(List<StructField> fields) {
- super(fields);
- }
-
- @Override
- public Object getStructFieldData(Object data, StructField fieldRef) {
- if (data == null) {
- return null;
- }
- LazyColumnar struct = (LazyColumnar) data;
- MyField f = (MyField) fieldRef;
-
- int fieldID = f.getFieldID();
- assert (fieldID >= 0 && fieldID < fields.size());
-
- Object column = struct.getField(fieldID);
- return column;
- }
-
- @Override
- public List<Object> getStructFieldsDataAsList(Object data) {
- if (data == null) {
- return null;
- }
- LazyColumnar struct = (LazyColumnar) data;
- return struct.getFieldsAsList();
- }
-
- public String toString() {
- String str = "";
- for (MyField f : fields) {
- str += f.getFieldName() + ":"
- + f.getFieldObjectInspector().getTypeName() + " ";
- }
- return str;
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
deleted file mode 100644
index 439b130..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyObjectInspectorFactory.java
+++ /dev/null
@@ -1,93 +0,0 @@
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-
-/**
- * ObjectInspectorFactory is the primary way to create new ObjectInspector
- * instances.
- *
- * SerDe classes should call the static functions in this library to create an
- * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
- *
- * The reason of having caches here is that ObjectInspectors do not have an
- * internal state - so ObjectInspectors with the same construction parameters
- * should result in exactly the same ObjectInspector.
- */
-
-public final class LazyObjectInspectorFactory {
-
- static ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector> cachedLazyColumnarObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyColumnarObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector> cachedLazyStructObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStructObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector> cachedLazyListObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyListObjectInspector>();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector> cachedLazyMapObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyMapObjectInspector>();
-
- public static LazyColumnarObjectInspector getLazyColumnarObjectInspector(
- List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(structFieldNames);
- signature.add(structFieldObjectInspectors);
- LazyColumnarObjectInspector result = cachedLazyColumnarObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyColumnarObjectInspector(structFieldNames,
- structFieldObjectInspectors);
- cachedLazyColumnarObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyStructObjectInspector getLazyStructObjectInspector(
- List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(structFieldNames);
- signature.add(structFieldObjectInspectors);
- LazyStructObjectInspector result = cachedLazyStructObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyStructObjectInspector(structFieldNames,
- structFieldObjectInspectors);
- cachedLazyStructObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyListObjectInspector getLazyListObjectInspector(
- ObjectInspector listElementInspector) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(listElementInspector);
- LazyListObjectInspector result = cachedLazyListObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyListObjectInspector(listElementInspector);
- cachedLazyListObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static LazyMapObjectInspector getLazyMapObjectInspector(
- ObjectInspector keyInspector, ObjectInspector valueInspector) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(keyInspector);
- signature.add(valueInspector);
- LazyMapObjectInspector result = cachedLazyMapObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyMapObjectInspector(keyInspector, valueInspector);
- cachedLazyMapObjectInspector.put(signature, result);
- }
- return result;
- }
-
- private LazyObjectInspectorFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
deleted file mode 100644
index 1a50233..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/LazyStructObjectInspector.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector;
-
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyStruct;
-
-/**
- * ObjectInspector for LazyStruct.
- *
- * @see LazyStruct
- */
-public class LazyStructObjectInspector extends StandardStructObjectInspector {
-
- protected LazyStructObjectInspector(List<String> structFieldNames,
- List<ObjectInspector> structFieldObjectInspectors) {
- super(structFieldNames, structFieldObjectInspectors);
- }
-
- protected LazyStructObjectInspector(List<StructField> fields) {
- super(fields);
- }
-
- @Override
- public Object getStructFieldData(Object data, StructField fieldRef) {
- if (data == null) {
- return null;
- }
- LazyStruct struct = (LazyStruct) data;
- MyField f = (MyField) fieldRef;
-
- int fieldID = f.getFieldID();
- assert (fieldID >= 0 && fieldID < fields.size());
-
- return struct.getField(fieldID);
- }
-
- @Override
- public List<Object> getStructFieldsDataAsList(Object data) {
- if (data == null) {
- return null;
- }
- LazyStruct struct = (LazyStruct) data;
- return struct.getFieldsAsList();
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
deleted file mode 100644
index 134dc5a..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import java.util.ArrayList;
-import java.util.concurrent.ConcurrentHashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * LazyPrimitiveObjectInspectorFactory is the primary way to create new
- * ObjectInspector instances.
- *
- * SerDe classes should call the static functions in this library to create an
- * ObjectInspector to return to the caller of SerDe2.getObjectInspector().
- *
- * The reason of having caches here is that ObjectInspector is because
- * ObjectInspectors do not have an internal state - so ObjectInspectors with the
- * same construction parameters should result in exactly the same
- * ObjectInspector.
- */
-public final class LazyPrimitiveObjectInspectorFactory {
-
- public static final LazyBooleanObjectInspector LAZY_BOOLEAN_OBJECT_INSPECTOR = new LazyBooleanObjectInspector();
- public static final LazyByteObjectInspector LAZY_BYTE_OBJECT_INSPECTOR = new LazyByteObjectInspector();
- public static final LazyShortObjectInspector LAZY_SHORT_OBJECT_INSPECTOR = new LazyShortObjectInspector();
- public static final LazyIntObjectInspector LAZY_INT_OBJECT_INSPECTOR = new LazyIntObjectInspector();
- public static final LazyLongObjectInspector LAZY_LONG_OBJECT_INSPECTOR = new LazyLongObjectInspector();
- public static final LazyFloatObjectInspector LAZY_FLOAT_OBJECT_INSPECTOR = new LazyFloatObjectInspector();
- public static final LazyDoubleObjectInspector LAZY_DOUBLE_OBJECT_INSPECTOR = new LazyDoubleObjectInspector();
- public static final LazyVoidObjectInspector LAZY_VOID_OBJECT_INSPECTOR = new LazyVoidObjectInspector();
-
- static ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector> cachedLazyStringObjectInspector = new ConcurrentHashMap<ArrayList<Object>, LazyStringObjectInspector>();
-
- public static LazyStringObjectInspector getLazyStringObjectInspector(
- boolean escaped, byte escapeChar) {
- ArrayList<Object> signature = new ArrayList<Object>();
- signature.add(Boolean.valueOf(escaped));
- signature.add(Byte.valueOf(escapeChar));
- LazyStringObjectInspector result = cachedLazyStringObjectInspector
- .get(signature);
- if (result == null) {
- result = new LazyStringObjectInspector(escaped, escapeChar);
- cachedLazyStringObjectInspector.put(signature, result);
- }
- return result;
- }
-
- public static AbstractPrimitiveLazyObjectInspector<?> getLazyObjectInspector(
- PrimitiveCategory primitiveCategory, boolean escaped,
- byte escapeChar) {
-
- switch (primitiveCategory) {
- case BOOLEAN:
- return LAZY_BOOLEAN_OBJECT_INSPECTOR;
- case BYTE:
- return LAZY_BYTE_OBJECT_INSPECTOR;
- case SHORT:
- return LAZY_SHORT_OBJECT_INSPECTOR;
- case INT:
- return LAZY_INT_OBJECT_INSPECTOR;
- case LONG:
- return LAZY_LONG_OBJECT_INSPECTOR;
- case FLOAT:
- return LAZY_FLOAT_OBJECT_INSPECTOR;
- case DOUBLE:
- return LAZY_DOUBLE_OBJECT_INSPECTOR;
- case STRING:
- return getLazyStringObjectInspector(escaped, escapeChar);
- case VOID:
- return LAZY_VOID_OBJECT_INSPECTOR;
- default:
- throw new RuntimeException(
- "Internal error: Cannot find ObjectInspector " + " for "
- + primitiveCategory);
- }
- }
-
- private LazyPrimitiveObjectInspectorFactory() {
- // prevent instantiation
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
deleted file mode 100644
index 5832f34..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/LazyStringObjectInspector.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyString;
-
-/**
- * A WritableStringObjectInspector inspects a Text Object.
- */
-public class LazyStringObjectInspector extends
- AbstractPrimitiveLazyObjectInspector<Text> implements
- StringObjectInspector {
-
- boolean escaped;
- byte escapeChar;
-
- LazyStringObjectInspector(boolean escaped, byte escapeChar) {
- super(PrimitiveObjectInspectorUtils.stringTypeEntry);
- this.escaped = escaped;
- this.escapeChar = escapeChar;
- }
-
- @Override
- public Object copyObject(Object o) {
- return o == null ? null : new LazyString((LazyString) o);
- }
-
- @Override
- public Text getPrimitiveWritableObject(Object o) {
- return o == null ? null : ((LazyString) o).getWritableObject();
- }
-
- @Override
- public String getPrimitiveJavaObject(Object o) {
- return o == null ? null : ((LazyString) o).getWritableObject()
- .toString();
- }
-
- public boolean isEscaped() {
- return escaped;
- }
-
- public byte getEscapeChar() {
- return escapeChar;
- }
-
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
deleted file mode 100644
index e70bdb9..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/lazy/objectinspector/primitive/PrimitiveObjectInspectorFactory.java
+++ /dev/null
@@ -1,89 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.serde.lazy.objectinspector.primitive;
-
-import java.util.HashMap;
-
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
-
-/**
- * PrimitiveObjectInspectorFactory is the primary way to create new
- * PrimitiveObjectInspector instances.
- *
- * The reason of having caches here is that ObjectInspector is because
- * ObjectInspectors do not have an internal state - so ObjectInspectors with the
- * same construction parameters should result in exactly the same
- * ObjectInspector.
- */
-public final class PrimitiveObjectInspectorFactory {
-
- public static final LazyBooleanObjectInspector LazyBooleanObjectInspector = new LazyBooleanObjectInspector();
- public static final LazyByteObjectInspector LazyByteObjectInspector = new LazyByteObjectInspector();
- public static final LazyShortObjectInspector LazyShortObjectInspector = new LazyShortObjectInspector();
- public static final LazyIntObjectInspector LazyIntObjectInspector = new LazyIntObjectInspector();
- public static final LazyLongObjectInspector LazyLongObjectInspector = new LazyLongObjectInspector();
- public static final LazyFloatObjectInspector LazyFloatObjectInspector = new LazyFloatObjectInspector();
- public static final LazyDoubleObjectInspector LazyDoubleObjectInspector = new LazyDoubleObjectInspector();
- public static final LazyStringObjectInspector LazyStringObjectInspector = new LazyStringObjectInspector(
- false, (byte) '\\');
- public static final LazyVoidObjectInspector LazyVoidObjectInspector = new LazyVoidObjectInspector();
-
- private static HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>> cachedPrimitiveLazyInspectorCache = new HashMap<PrimitiveCategory, AbstractPrimitiveLazyObjectInspector<?>>();
-
- static {
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BOOLEAN,
- LazyBooleanObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.BYTE,
- LazyByteObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.SHORT,
- LazyShortObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.INT,
- LazyIntObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.LONG,
- LazyLongObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.FLOAT,
- LazyFloatObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.DOUBLE,
- LazyDoubleObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.STRING,
- LazyStringObjectInspector);
- cachedPrimitiveLazyInspectorCache.put(PrimitiveCategory.VOID,
- LazyVoidObjectInspector);
- }
-
- /**
- * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory.
- *
- * @param primitiveCategory
- */
- public static AbstractPrimitiveLazyObjectInspector<?> getPrimitiveLazyObjectInspector(
- PrimitiveCategory primitiveCategory) {
- AbstractPrimitiveLazyObjectInspector<?> result = cachedPrimitiveLazyInspectorCache
- .get(primitiveCategory);
- if (result == null) {
- throw new RuntimeException(
- "Internal error: Cannot find ObjectInspector " + " for "
- + primitiveCategory);
- }
- return result;
- }
-
- private PrimitiveObjectInspectorFactory() {
- // prevent instantiation
- }
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
deleted file mode 100644
index aeea68f..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/IHiveParser.java
+++ /dev/null
@@ -1,17 +0,0 @@
-package edu.uci.ics.hivesterix.serde.parser;
-
-import java.io.IOException;
-
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public interface IHiveParser {
- /**
- * parse one hive rwo into
- *
- * @param row
- * @param objectInspector
- * @param tb
- */
- public void parse(byte[] data, int start, int length, ArrayTupleBuilder tb)
- throws IOException;
-}
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
deleted file mode 100644
index 3aeb058..0000000
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/serde/parser/TextToBinaryTupleParser.java
+++ /dev/null
@@ -1,184 +0,0 @@
-package edu.uci.ics.hivesterix.serde.parser;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.lazy.LazyInteger;
-import org.apache.hadoop.hive.serde2.lazy.LazyLong;
-import org.apache.hadoop.hive.serde2.lazy.LazyShort;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.io.Text;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazyUtils;
-import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
-import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
-
-public class TextToBinaryTupleParser implements IHiveParser {
- private int[] invertedIndex;
- private int[] fieldEnds;
- private int lastNecessaryFieldIndex;
- private LazySimpleStructObjectInspector inputObjectInspector;
- private List<? extends StructField> fieldRefs;
-
- public TextToBinaryTupleParser(int[] outputColumnsOffset,
- ObjectInspector structInspector) {
- int size = 0;
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0)
- size++;
- invertedIndex = new int[size];
- for (int i = 0; i < outputColumnsOffset.length; i++)
- if (outputColumnsOffset[i] >= 0) {
- invertedIndex[outputColumnsOffset[i]] = i;
- lastNecessaryFieldIndex = i;
- }
- fieldEnds = new int[outputColumnsOffset.length];
- for (int i = 0; i < fieldEnds.length; i++)
- fieldEnds[i] = 0;
- inputObjectInspector = (LazySimpleStructObjectInspector) structInspector;
- fieldRefs = inputObjectInspector.getAllStructFieldRefs();
- }
-
- @Override
- public void parse(byte[] bytes, int start, int length, ArrayTupleBuilder tb)
- throws IOException {
- byte separator = inputObjectInspector.getSeparator();
- boolean lastColumnTakesRest = inputObjectInspector
- .getLastColumnTakesRest();
- boolean isEscaped = inputObjectInspector.isEscaped();
- byte escapeChar = inputObjectInspector.getEscapeChar();
- DataOutput output = tb.getDataOutput();
-
- int structByteEnd = start + length - 1;
- int fieldId = 0;
- int fieldByteEnd = start;
-
- // Go through all bytes in the byte[]
- while (fieldByteEnd <= structByteEnd
- && fieldId <= lastNecessaryFieldIndex) {
- if (fieldByteEnd == structByteEnd
- || bytes[fieldByteEnd] == separator) {
- // Reached the end of a field?
- if (lastColumnTakesRest && fieldId == fieldEnds.length - 1) {
- fieldByteEnd = structByteEnd;
- }
- fieldEnds[fieldId] = fieldByteEnd;
- if (fieldId == fieldEnds.length - 1
- || fieldByteEnd == structByteEnd) {
- // for the case of null fields
- for (int i = fieldId; i < fieldEnds.length; i++) {
- fieldEnds[i] = fieldByteEnd;
- }
- break;
- }
- fieldByteEnd++;
- fieldId++;
- } else {
- if (isEscaped && bytes[fieldByteEnd] == escapeChar
- && fieldByteEnd + 1 < structByteEnd) {
- // ignore the char after escape_char
- fieldByteEnd += 2;
- } else {
- fieldByteEnd++;
- }
- }
- }
-
- for (int i = 0; i < invertedIndex.length; i++) {
- int index = invertedIndex[i];
- StructField fieldRef = fieldRefs.get(index);
- ObjectInspector inspector = fieldRef.getFieldObjectInspector();
- Category category = inspector.getCategory();
- int fieldStart = index == 0 ? 0 : fieldEnds[index - 1] + 1;
- int fieldEnd = fieldEnds[index];
- if (bytes[fieldEnd] == separator)
- fieldEnd--;
- int fieldLen = fieldEnd - fieldStart + 1;
- switch (category) {
- case PRIMITIVE:
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector;
- switch (poi.getPrimitiveCategory()) {
- case VOID: {
- break;
- }
- case BOOLEAN: {
- output.write(bytes[fieldStart]);
- break;
- }
- case BYTE: {
- output.write(bytes[fieldStart]);
- break;
- }
- case SHORT: {
- short v = LazyShort.parseShort(bytes, fieldStart, fieldLen);
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- break;
- }
- case INT: {
- int v = LazyInteger.parseInt(bytes, fieldStart, fieldLen);
- LazyUtils.writeVInt(output, v);
- break;
- }
- case LONG: {
- long v = LazyLong.parseLong(bytes, fieldStart, fieldLen);
- LazyUtils.writeVLong(output, v);
- break;
- }
- case FLOAT: {
- float value = Float.parseFloat(Text.decode(bytes,
- fieldStart, fieldLen));
- int v = Float.floatToIntBits(value);
- output.write((byte) (v >> 24));
- output.write((byte) (v >> 16));
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- break;
- }
- case DOUBLE: {
- try {
- double value = Double.parseDouble(Text.decode(bytes,
- fieldStart, fieldLen));
- long v = Double.doubleToLongBits(value);
- output.write((byte) (v >> 56));
- output.write((byte) (v >> 48));
- output.write((byte) (v >> 40));
- output.write((byte) (v >> 32));
- output.write((byte) (v >> 24));
- output.write((byte) (v >> 16));
- output.write((byte) (v >> 8));
- output.write((byte) (v));
- } catch (NumberFormatException e) {
- throw e;
- }
- break;
- }
- case STRING: {
- LazyUtils.writeVInt(output, fieldLen);
- output.write(bytes, fieldStart, fieldLen);
- break;
- }
- default: {
- throw new RuntimeException("Unrecognized type: "
- + poi.getPrimitiveCategory());
- }
- }
- break;
- case STRUCT:
- throw new NotImplementedException("Unrecognized type: struct ");
- case LIST:
- throw new NotImplementedException("Unrecognized type: struct ");
- case MAP:
- throw new NotImplementedException("Unrecognized type: struct ");
- case UNION:
- throw new NotImplementedException("Unrecognized type: struct ");
- }
- tb.addFieldEndOffset();
- }
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java
deleted file mode 100644
index 57e2cc0..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ /dev/null
@@ -1,1441 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql;
-
-import java.io.DataInput;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Queue;
-import java.util.Set;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.JavaUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.MetaStoreUtils;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.exec.ConditionalTask;
-import org.apache.hadoop.hive.ql.exec.ExecDriver;
-import org.apache.hadoop.hive.ql.exec.FetchTask;
-import org.apache.hadoop.hive.ql.exec.MapRedTask;
-import org.apache.hadoop.hive.ql.exec.Operator;
-import org.apache.hadoop.hive.ql.exec.StatsTask;
-import org.apache.hadoop.hive.ql.exec.TableScanOperator;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.exec.TaskFactory;
-import org.apache.hadoop.hive.ql.exec.TaskResult;
-import org.apache.hadoop.hive.ql.exec.TaskRunner;
-import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
-import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
-import org.apache.hadoop.hive.ql.hooks.Hook;
-import org.apache.hadoop.hive.ql.hooks.HookContext;
-import org.apache.hadoop.hive.ql.hooks.PostExecute;
-import org.apache.hadoop.hive.ql.hooks.PreExecute;
-import org.apache.hadoop.hive.ql.hooks.ReadEntity;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
-import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
-import org.apache.hadoop.hive.ql.lockmgr.LockException;
-import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
-import org.apache.hadoop.hive.ql.metadata.DummyPartition;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.HiveUtils;
-import org.apache.hadoop.hive.ql.metadata.Partition;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
-import org.apache.hadoop.hive.ql.parse.ASTNode;
-import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
-import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.ErrorMsg;
-import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
-import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
-import org.apache.hadoop.hive.ql.parse.ParseContext;
-import org.apache.hadoop.hive.ql.parse.ParseDriver;
-import org.apache.hadoop.hive.ql.parse.ParseException;
-import org.apache.hadoop.hive.ql.parse.ParseUtils;
-import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
-import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
-import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
-import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
-import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
-import org.apache.hadoop.hive.ql.plan.HiveOperation;
-import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
-import org.apache.hadoop.hive.serde2.ByteStream;
-import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.mapred.ClusterStatus;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.util.ReflectionUtils;
-
-import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
-import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
-
-@SuppressWarnings({ "deprecation", "unused" })
-public class Driver implements CommandProcessor {
-
- static final private Log LOG = LogFactory.getLog(Driver.class.getName());
- static final private LogHelper console = new LogHelper(LOG);
-
- // hive-sterix
- private IExecutionEngine engine;
- private boolean hivesterix = false;
-
- private int maxRows = 100;
- ByteStream.Output bos = new ByteStream.Output();
-
- private HiveConf conf;
- private DataInput resStream;
- private Context ctx;
- private QueryPlan plan;
- private Schema schema;
- private HiveLockManager hiveLockMgr;
-
- private String errorMessage;
- private String SQLState;
-
- // A limit on the number of threads that can be launched
- private int maxthreads;
- private final int sleeptime = 2000;
-
- protected int tryCount = Integer.MAX_VALUE;
-
- private int checkLockManager() {
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency && (hiveLockMgr == null)) {
- try {
- setLockManager();
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(
- errorMessage,
- "\n"
- + org.apache.hadoop.util.StringUtils
- .stringifyException(e));
- return (12);
- }
- }
- return (0);
- }
-
- private void setLockManager() throws SemanticException {
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (supportConcurrency) {
- String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER);
- if ((lockMgr == null) || (lockMgr.isEmpty())) {
- throw new SemanticException(
- ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg());
- }
-
- try {
- hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(
- conf.getClassByName(lockMgr), conf);
- hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
- } catch (Exception e) {
- throw new SemanticException(
- ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg()
- + e.getMessage());
- }
- }
- }
-
- public void init() {
- Operator.resetId();
- }
-
- /**
- * Return the status information about the Map-Reduce cluster
- */
- public ClusterStatus getClusterStatus() throws Exception {
- ClusterStatus cs;
- try {
- JobConf job = new JobConf(conf, ExecDriver.class);
- JobClient jc = new JobClient(job);
- cs = jc.getClusterStatus();
- } catch (Exception e) {
- e.printStackTrace();
- throw e;
- }
- LOG.info("Returning cluster status: " + cs.toString());
- return cs;
- }
-
- public Schema getSchema() {
- return schema;
- }
-
- /**
- * Get a Schema with fields represented with native Hive types
- */
- public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
- Schema schema = null;
-
- // If we have a plan, prefer its logical result schema if it's
- // available; otherwise, try digging out a fetch task; failing that,
- // give up.
- if (sem == null) {
- // can't get any info without a plan
- } else if (sem.getResultSchema() != null) {
- List<FieldSchema> lst = sem.getResultSchema();
- schema = new Schema(lst, null);
- } else if (sem.getFetchTask() != null) {
- FetchTask ft = sem.getFetchTask();
- TableDesc td = ft.getTblDesc();
- // partitioned tables don't have tableDesc set on the FetchTask.
- // Instead
- // they have a list of PartitionDesc objects, each with a table
- // desc.
- // Let's
- // try to fetch the desc for the first partition and use it's
- // deserializer.
- if (td == null && ft.getWork() != null
- && ft.getWork().getPartDesc() != null) {
- if (ft.getWork().getPartDesc().size() > 0) {
- td = ft.getWork().getPartDesc().get(0).getTableDesc();
- }
- }
-
- if (td == null) {
- LOG.info("No returning schema.");
- } else {
- String tableName = "result";
- List<FieldSchema> lst = null;
- try {
- lst = MetaStoreUtils.getFieldsFromDeserializer(tableName,
- td.getDeserializer());
- } catch (Exception e) {
- LOG.warn("Error getting schema: "
- + org.apache.hadoop.util.StringUtils
- .stringifyException(e));
- }
- if (lst != null) {
- schema = new Schema(lst, null);
- }
- }
- }
- if (schema == null) {
- schema = new Schema();
- }
- LOG.info("Returning Hive schema: " + schema);
- return schema;
- }
-
- /**
- * Get a Schema with fields represented with Thrift DDL types
- */
- public Schema getThriftSchema() throws Exception {
- Schema schema;
- try {
- schema = getSchema();
- if (schema != null) {
- List<FieldSchema> lst = schema.getFieldSchemas();
- // Go over the schema and convert type to thrift type
- if (lst != null) {
- for (FieldSchema f : lst) {
- f.setType(MetaStoreUtils.typeToThriftType(f.getType()));
- }
- }
- }
- } catch (Exception e) {
- e.printStackTrace();
- throw e;
- }
- LOG.info("Returning Thrift schema: " + schema);
- return schema;
- }
-
- /**
- * Return the maximum number of rows returned by getResults
- */
- public int getMaxRows() {
- return maxRows;
- }
-
- /**
- * Set the maximum number of rows returned by getResults
- */
- public void setMaxRows(int maxRows) {
- this.maxRows = maxRows;
- }
-
- public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
- if (tasks == null) {
- return false;
- }
-
- boolean hasReduce = false;
- for (Task<? extends Serializable> task : tasks) {
- if (task.hasReduce()) {
- return true;
- }
-
- hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
- }
- return hasReduce;
- }
-
- /**
- * for backwards compatibility with current tests
- */
- public Driver(HiveConf conf) {
- this.conf = conf;
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- public Driver() {
- if (SessionState.get() != null) {
- conf = SessionState.get().getConf();
- }
-
- // hivesterix
- engine = new HyracksExecutionEngine(conf);
- }
-
- // hivesterix: plan printer
- public Driver(HiveConf conf, PrintWriter planPrinter) {
- this.conf = conf;
- engine = new HyracksExecutionEngine(conf, planPrinter);
- }
-
- public void clear() {
- this.hivesterix = false;
- }
-
- /**
- * Compile a new query. Any currently-planned query associated with this
- * Driver is discarded.
- *
- * @param command
- * The SQL query to compile.
- */
- public int compile(String command) {
- if (plan != null) {
- close();
- plan = null;
- }
-
- TaskFactory.resetId();
-
- try {
- command = new VariableSubstitution().substitute(conf, command);
- ctx = new Context(conf);
-
- ParseDriver pd = new ParseDriver();
- ASTNode tree = pd.parse(command, ctx);
- tree = ParseUtils.findRootNonNullToken(tree);
-
- BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
- List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
-
- // Do semantic analysis and plan generation
- if (saHooks != null) {
- HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
- hookCtx.setConf(conf);
- for (AbstractSemanticAnalyzerHook hook : saHooks) {
- tree = hook.preAnalyze(hookCtx, tree);
- }
- sem.analyze(tree, ctx);
- for (AbstractSemanticAnalyzerHook hook : saHooks) {
- hook.postAnalyze(hookCtx, sem.getRootTasks());
- }
- } else {
- sem.analyze(tree, ctx);
- }
-
- LOG.info("Semantic Analysis Completed");
-
- // validate the plan
- sem.validate();
-
- plan = new QueryPlan(command, sem);
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // get the output schema
- schema = getSchema(sem, conf);
-
- // test Only - serialize the query plan and deserialize it
- if (sem instanceof SemanticAnalyzer
- && command.toLowerCase().indexOf("create") < 0) {
-
- Thread.currentThread().setContextClassLoader(
- this.getClass().getClassLoader());
-
- String queryPlanFileName = ctx.getLocalScratchDir(true)
- + Path.SEPARATOR_CHAR + "queryplan.xml";
- LOG.info("query plan = " + queryPlanFileName);
- queryPlanFileName = new Path(queryPlanFileName).toUri()
- .getPath();
-
- // serialize the queryPlan
- FileOutputStream fos = new FileOutputStream(queryPlanFileName);
- Utilities.serializeQueryPlan(plan, fos);
- fos.close();
-
- // deserialize the queryPlan
- FileInputStream fis = new FileInputStream(queryPlanFileName);
- QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
- fis.close();
-
- // Use the deserialized plan
- plan = newPlan;
- }
-
- // initialize FetchTask right here
- if (plan.getFetchTask() != null) {
- plan.getFetchTask().initialize(conf, plan, null);
- }
-
- // do the authorization check
- if (HiveConf.getBoolVar(conf,
- HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
- try {
- // doAuthorization(sem);
- } catch (AuthorizationException authExp) {
- console.printError("Authorization failed:"
- + authExp.getMessage()
- + ". Use show grant to get more details.");
- return 403;
- }
- }
-
- // hyracks run
- if (sem instanceof SemanticAnalyzer
- && command.toLowerCase().indexOf("create") < 0) {
- hivesterix = true;
- return engine.compileJob(sem.getRootTasks());
- }
-
- return 0;
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in semantic analysis: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (ParseException e) {
- errorMessage = "FAILED: Parse Error: " + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (11);
- } catch (Exception e) {
- errorMessage = "FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e);
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
- }
- }
-
- private void doAuthorization(BaseSemanticAnalyzer sem)
- throws HiveException, AuthorizationException {
- HashSet<ReadEntity> inputs = sem.getInputs();
- HashSet<WriteEntity> outputs = sem.getOutputs();
- SessionState ss = SessionState.get();
- HiveOperation op = ss.getHiveOperation();
- Hive db = sem.getDb();
- if (op != null) {
- if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
- || op.equals(HiveOperation.CREATETABLE)) {
- ss.getAuthorizer().authorize(
- db.getDatabase(db.getCurrentDatabase()),
- null,
- HiveOperation.CREATETABLE_AS_SELECT
- .getOutputRequiredPrivileges());
- } else {
- // if (op.equals(HiveOperation.IMPORT)) {
- // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
- // if (!isa.existsTable()) {
- ss.getAuthorizer().authorize(
- db.getDatabase(db.getCurrentDatabase()),
- null,
- HiveOperation.CREATETABLE_AS_SELECT
- .getOutputRequiredPrivileges());
- // }
- // }
- }
- if (outputs != null && outputs.size() > 0) {
- for (WriteEntity write : outputs) {
-
- if (write.getType() == WriteEntity.Type.PARTITION) {
- Partition part = db.getPartition(write.getTable(),
- write.getPartition().getSpec(), false);
- if (part != null) {
- ss.getAuthorizer().authorize(write.getPartition(),
- null, op.getOutputRequiredPrivileges());
- continue;
- }
- }
-
- if (write.getTable() != null) {
- ss.getAuthorizer().authorize(write.getTable(), null,
- op.getOutputRequiredPrivileges());
- }
- }
-
- }
- }
-
- if (inputs != null && inputs.size() > 0) {
-
- Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
- Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
-
- Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
- for (ReadEntity read : inputs) {
- if (read.getPartition() != null) {
- Table tbl = read.getTable();
- String tblName = tbl.getTableName();
- if (tableUsePartLevelAuth.get(tblName) == null) {
- boolean usePartLevelPriv = (tbl.getParameters().get(
- "PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
- .equalsIgnoreCase(tbl.getParameters().get(
- "PARTITION_LEVEL_PRIVILEGE"))));
- if (usePartLevelPriv) {
- tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
- } else {
- tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
- }
- }
- }
- }
-
- if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
- || op.equals(HiveOperation.QUERY)) {
- SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
- ParseContext parseCtx = querySem.getParseContext();
- Map<TableScanOperator, Table> tsoTopMap = parseCtx
- .getTopToTable();
-
- for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem
- .getParseContext().getTopOps().entrySet()) {
- Operator<? extends Serializable> topOp = topOpMap
- .getValue();
- if (topOp instanceof TableScanOperator
- && tsoTopMap.containsKey(topOp)) {
- TableScanOperator tableScanOp = (TableScanOperator) topOp;
- Table tbl = tsoTopMap.get(tableScanOp);
- List<Integer> neededColumnIds = tableScanOp
- .getNeededColumnIDs();
- List<FieldSchema> columns = tbl.getCols();
- List<String> cols = new ArrayList<String>();
- if (neededColumnIds != null
- && neededColumnIds.size() > 0) {
- for (int i = 0; i < neededColumnIds.size(); i++) {
- cols.add(columns.get(neededColumnIds.get(i))
- .getName());
- }
- } else {
- for (int i = 0; i < columns.size(); i++) {
- cols.add(columns.get(i).getName());
- }
- }
- if (tbl.isPartitioned()
- && tableUsePartLevelAuth
- .get(tbl.getTableName())) {
- String alias_id = topOpMap.getKey();
- PrunedPartitionList partsList = PartitionPruner
- .prune(parseCtx.getTopToTable().get(topOp),
- parseCtx.getOpToPartPruner().get(
- topOp), parseCtx.getConf(),
- alias_id,
- parseCtx.getPrunedPartitions());
- Set<Partition> parts = new HashSet<Partition>();
- parts.addAll(partsList.getConfirmedPartns());
- parts.addAll(partsList.getUnknownPartns());
- for (Partition part : parts) {
- List<String> existingCols = part2Cols.get(part);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- part2Cols.put(part, existingCols);
- }
- } else {
- List<String> existingCols = tab2Cols.get(tbl);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- tab2Cols.put(tbl, existingCols);
- }
- }
- }
- }
-
- // cache the results for table authorization
- Set<String> tableAuthChecked = new HashSet<String>();
- for (ReadEntity read : inputs) {
- Table tbl = null;
- if (read.getPartition() != null) {
- tbl = read.getPartition().getTable();
- // use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName())) {
- List<String> cols = part2Cols.get(read.getPartition());
- if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(
- read.getPartition().getTable(),
- read.getPartition(), cols,
- op.getInputRequiredPrivileges(), null);
- } else {
- ss.getAuthorizer().authorize(read.getPartition(),
- op.getInputRequiredPrivileges(), null);
- }
- continue;
- }
- } else if (read.getTable() != null) {
- tbl = read.getTable();
- }
-
- // if we reach here, it means it needs to do a table
- // authorization
- // check, and the table authorization may already happened
- // because of other
- // partitions
- if (tbl != null
- && !tableAuthChecked.contains(tbl.getTableName())) {
- List<String> cols = tab2Cols.get(tbl);
- if (cols != null && cols.size() > 0) {
- ss.getAuthorizer().authorize(tbl, null, cols,
- op.getInputRequiredPrivileges(), null);
- } else {
- ss.getAuthorizer().authorize(tbl,
- op.getInputRequiredPrivileges(), null);
- }
- tableAuthChecked.add(tbl.getTableName());
- }
- }
-
- }
- }
-
- /**
- * @return The current query plan associated with this Driver, if any.
- */
- public QueryPlan getPlan() {
- return plan;
- }
-
- /**
- * @param t
- * The table to be locked
- * @param p
- * The partition to be locked
- * @param mode
- * The mode of the lock (SHARED/EXCLUSIVE) Get the list of
- * objects to be locked. If a partition needs to be locked (in
- * any mode), all its parents should also be locked in SHARED
- * mode.
- **/
- private List<HiveLockObj> getLockObjects(Table t, Partition p,
- HiveLockMode mode) throws SemanticException {
- List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
-
- HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(),
- String.valueOf(System.currentTimeMillis()), "IMPLICIT");
-
- if (t != null) {
- locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
- mode = HiveLockMode.SHARED;
- locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(),
- lockData), mode));
- return locks;
- }
-
- if (p != null) {
- if (!(p instanceof DummyPartition)) {
- locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
- }
-
- // All the parents are locked in shared mode
- mode = HiveLockMode.SHARED;
-
- // For dummy partitions, only partition name is needed
- String name = p.getName();
-
- if (p instanceof DummyPartition) {
- name = p.getName().split("@")[2];
- }
-
- String partName = name;
- String partialName = "";
- String[] partns = name.split("/");
- int len = p instanceof DummyPartition ? partns.length
- : partns.length - 1;
- for (int idx = 0; idx < len; idx++) {
- String partn = partns[idx];
- partialName += partn;
- try {
- locks.add(new HiveLockObj(new HiveLockObject(
- new DummyPartition(p.getTable(), p.getTable()
- .getDbName()
- + "/"
- + p.getTable().getTableName()
- + "/"
- + partialName), lockData), mode));
- partialName += "/";
- } catch (HiveException e) {
- throw new SemanticException(e.getMessage());
- }
- }
-
- locks.add(new HiveLockObj(
- new HiveLockObject(p.getTable(), lockData), mode));
- locks.add(new HiveLockObj(new HiveLockObject(p.getTable()
- .getDbName(), lockData), mode));
- }
- return locks;
- }
-
- /**
- * Acquire read and write locks needed by the statement. The list of objects
- * to be locked are obtained from he inputs and outputs populated by the
- * compiler. The lock acuisition scheme is pretty simple. If all the locks
- * cannot be obtained, error out. Deadlock is avoided by making sure that
- * the locks are lexicographically sorted.
- **/
- public int acquireReadWriteLocks() {
- try {
- int sleepTime = conf
- .getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
- int numRetries = conf
- .getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
-
- boolean supportConcurrency = conf
- .getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
- if (!supportConcurrency) {
- return 0;
- }
-
- List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
-
- // Sort all the inputs, outputs.
- // If a lock needs to be acquired on any partition, a read lock
- // needs to be acquired on all
- // its parents also
- for (ReadEntity input : plan.getInputs()) {
- if (input.getType() == ReadEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(input.getTable(), null,
- HiveLockMode.SHARED));
- } else {
- lockObjects.addAll(getLockObjects(null,
- input.getPartition(), HiveLockMode.SHARED));
- }
- }
-
- for (WriteEntity output : plan.getOutputs()) {
- if (output.getTyp() == WriteEntity.Type.TABLE) {
- lockObjects.addAll(getLockObjects(output.getTable(), null,
- output.isComplete() ? HiveLockMode.EXCLUSIVE
- : HiveLockMode.SHARED));
- } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
- lockObjects.addAll(getLockObjects(null,
- output.getPartition(), HiveLockMode.EXCLUSIVE));
- }
- // In case of dynamic queries, it is possible to have incomplete
- // dummy partitions
- else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
- lockObjects.addAll(getLockObjects(null,
- output.getPartition(), HiveLockMode.SHARED));
- }
- }
-
- if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
- return 0;
- }
-
- int ret = checkLockManager();
- if (ret != 0) {
- return ret;
- }
-
- HiveLockObjectData lockData = new HiveLockObjectData(
- plan.getQueryId(), String.valueOf(System
- .currentTimeMillis()), "IMPLICIT");
-
- // Lock the database also
- try {
- Hive db = Hive.get(conf);
- lockObjects.add(new HiveLockObj(new HiveLockObject(db
- .getCurrentDatabase(), lockData), HiveLockMode.SHARED));
- } catch (HiveException e) {
- throw new SemanticException(e.getMessage());
- }
-
- ctx.setHiveLockMgr(hiveLockMgr);
- List<HiveLock> hiveLocks = null;
-
- int tryNum = 1;
- do {
-
- // ctx.getHiveLockMgr();
- // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
-
- if (hiveLocks != null) {
- break;
- }
-
- tryNum++;
- try {
- Thread.sleep(sleepTime);
- } catch (InterruptedException e) {
- }
- } while (tryNum < numRetries);
-
- if (hiveLocks == null) {
- throw new SemanticException(
- ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
- } else {
- ctx.setHiveLocks(hiveLocks);
- }
-
- return (0);
- } catch (SemanticException e) {
- errorMessage = "FAILED: Error in acquiring locks: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- } catch (Exception e) {
- errorMessage = "FAILED: Error in acquiring locks: "
- + e.getMessage();
- SQLState = ErrorMsg.findSQLState(e.getMessage());
- console.printError(errorMessage, "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (10);
- }
- }
-
- /**
- * Release all the locks acquired implicitly by the statement. Note that the
- * locks acquired with 'keepAlive' set to True are not released.
- **/
- private void releaseLocks() {
- if (ctx != null && ctx.getHiveLockMgr() != null) {
- try {
- ctx.getHiveLockMgr().close();
- ctx.setHiveLocks(null);
- } catch (LockException e) {
- }
- }
- }
-
- /**
- * @param hiveLocks
- * list of hive locks to be released Release all the locks
- * specified. If some of the locks have already been released,
- * ignore them
- **/
- private void releaseLocks(List<HiveLock> hiveLocks) {
- if (hiveLocks != null) {
- ctx.getHiveLockMgr().releaseLocks(hiveLocks);
- }
- ctx.setHiveLocks(null);
- }
-
- public CommandProcessorResponse run(String command) {
- errorMessage = null;
- SQLState = null;
-
- int ret = compile(command);
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- // ret = acquireReadWriteLocks();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- ret = execute();
- if (ret != 0) {
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret, errorMessage, SQLState);
- }
-
- // releaseLocks(ctx.getHiveLocks());
- return new CommandProcessorResponse(ret);
- }
-
- private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks()
- throws Exception {
- ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
- String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
- if (pestr == null) {
- return saHooks;
- }
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return saHooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- AbstractSemanticAnalyzerHook hook = HiveUtils
- .getSemanticAnalyzerHook(conf, peClass);
- saHooks.add(hook);
- } catch (HiveException e) {
- console.printError("Pre Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return saHooks;
- }
-
- private List<Hook> getPreExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true,
- JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Pre Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- private List<Hook> getPostExecHooks() throws Exception {
- ArrayList<Hook> pehooks = new ArrayList<Hook>();
- String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
- pestr = pestr.trim();
- if (pestr.equals("")) {
- return pehooks;
- }
-
- String[] peClasses = pestr.split(",");
-
- for (String peClass : peClasses) {
- try {
- pehooks.add((Hook) Class.forName(peClass.trim(), true,
- JavaUtils.getClassLoader()).newInstance());
- } catch (ClassNotFoundException e) {
- console.printError("Post Exec Hook Class not found:"
- + e.getMessage());
- throw e;
- }
- }
-
- return pehooks;
- }
-
- public int execute() {
- // execute hivesterix plan
- if (hivesterix) {
- hivesterix = false;
- int ret = engine.executeJob();
- if (ret != 0)
- return ret;
- }
-
- boolean noName = StringUtils.isEmpty(conf
- .getVar(HiveConf.ConfVars.HADOOPJOBNAME));
- int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
-
- String queryId = plan.getQueryId();
- String queryStr = plan.getQueryStr();
-
- conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
- conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
- maxthreads = HiveConf.getIntVar(conf,
- HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
-
- try {
- LOG.info("Starting command: " + queryStr);
-
- plan.setStarted();
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .startQuery(queryStr,
- conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
- SessionState.get().getHiveHistory().logPlanProgress(plan);
- }
- resStream = null;
-
- HookContext hookContext = new HookContext(plan, conf);
-
- for (Hook peh : getPreExecHooks()) {
- if (peh instanceof ExecuteWithHookContext) {
- ((ExecuteWithHookContext) peh).run(hookContext);
- } else if (peh instanceof PreExecute) {
- ((PreExecute) peh).run(SessionState.get(),
- plan.getInputs(), plan.getOutputs(), ShimLoader
- .getHadoopShims().getUGIForConf(conf));
- }
- }
-
- int jobs = Utilities.getMRTasks(plan.getRootTasks()).size();
- if (jobs > 0) {
- console.printInfo("Total MapReduce jobs = " + jobs);
- }
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_NUM_TASKS,
- String.valueOf(jobs));
- SessionState.get().getHiveHistory()
- .setIdToTableMap(plan.getIdToTableNameMap());
- }
- String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
-
- // A runtime that launches runnable tasks as separate Threads
- // through
- // TaskRunners
- // As soon as a task isRunnable, it is put in a queue
- // At any time, at most maxthreads tasks can be running
- // The main thread polls the TaskRunners to check if they have
- // finished.
-
- Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
- Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
-
- DriverContext driverCxt = new DriverContext(runnable, ctx);
-
- // Add root Tasks to runnable
-
- for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
- driverCxt.addToRunnable(tsk);
- }
-
- // Loop while you either have tasks running, or tasks queued up
-
- while (running.size() != 0 || runnable.peek() != null) {
- // Launch upto maxthreads tasks
- while (runnable.peek() != null && running.size() < maxthreads) {
- Task<? extends Serializable> tsk = runnable.remove();
- console.printInfo("executing task " + tsk.getName());
- launchTask(tsk, queryId, noName, running, jobname, jobs,
- driverCxt);
- }
-
- // poll the Tasks to see which one completed
- TaskResult tskRes = pollTasks(running.keySet());
- TaskRunner tskRun = running.remove(tskRes);
- Task<? extends Serializable> tsk = tskRun.getTask();
- hookContext.addCompleteTask(tskRun);
-
- int exitVal = tskRes.getExitVal();
- if (exitVal != 0) {
- Task<? extends Serializable> backupTask = tsk
- .getAndInitBackupTask();
- if (backupTask != null) {
- errorMessage = "FAILED: Execution Error, return code "
- + exitVal + " from " + tsk.getClass().getName();
- console.printError(errorMessage);
-
- errorMessage = "ATTEMPT: Execute BackupTask: "
- + backupTask.getClass().getName();
- console.printError(errorMessage);
-
- // add backup task to runnable
- if (DriverContext.isLaunchable(backupTask)) {
- driverCxt.addToRunnable(backupTask);
- }
- continue;
-
- } else {
- // TODO: This error messaging is not very informative.
- // Fix that.
- errorMessage = "FAILED: Execution Error, return code "
- + exitVal + " from " + tsk.getClass().getName();
- SQLState = "08S01";
- console.printError(errorMessage);
- if (running.size() != 0) {
- taskCleanup();
- }
- // in case we decided to run everything in local mode,
- // restore the
- // the jobtracker setting to its initial value
- ctx.restoreOriginalTracker();
- return 9;
- }
- }
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setTaskProperty(queryId, tsk.getId(),
- Keys.TASK_RET_CODE, String.valueOf(exitVal));
- SessionState.get().getHiveHistory().endTask(queryId, tsk);
- }
-
- if (tsk.getChildTasks() != null) {
- for (Task<? extends Serializable> child : tsk
- .getChildTasks()) {
- // hivesterix: don't check launchable condition
- // if (DriverContext.isLaunchable(child)) {
- driverCxt.addToRunnable(child);
- // }
- }
- }
- }
-
- // in case we decided to run everything in local mode, restore the
- // the jobtracker setting to its initial value
- ctx.restoreOriginalTracker();
-
- // remove incomplete outputs.
- // Some incomplete outputs may be added at the beginning, for eg:
- // for dynamic partitions.
- // remove them
- HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
- for (WriteEntity output : plan.getOutputs()) {
- if (!output.isComplete()) {
- remOutputs.add(output);
- }
- }
-
- for (WriteEntity output : remOutputs) {
- plan.getOutputs().remove(output);
- }
-
- // Get all the post execution hooks and execute them.
- for (Hook peh : getPostExecHooks()) {
- if (peh instanceof ExecuteWithHookContext) {
- ((ExecuteWithHookContext) peh).run(hookContext);
- } else if (peh instanceof PostExecute) {
- ((PostExecute) peh)
- .run(SessionState.get(),
- plan.getInputs(),
- plan.getOutputs(),
- (SessionState.get() != null ? SessionState
- .get().getLineageState()
- .getLineageInfo() : null),
- ShimLoader.getHadoopShims().getUGIForConf(
- conf));
- }
- }
-
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_RET_CODE,
- String.valueOf(0));
- SessionState.get().getHiveHistory().printRowCount(queryId);
- }
- } catch (Exception e) {
- if (SessionState.get() != null) {
- SessionState
- .get()
- .getHiveHistory()
- .setQueryProperty(queryId, Keys.QUERY_RET_CODE,
- String.valueOf(12));
- }
- // TODO: do better with handling types of Exception here
- errorMessage = "FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e);
- SQLState = "08S01";
- console.printError(errorMessage + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return (12);
- } finally {
- if (SessionState.get() != null) {
- SessionState.get().getHiveHistory().endQuery(queryId);
- }
- if (noName) {
- conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
- }
- }
- plan.setDone();
-
- if (SessionState.get() != null) {
- try {
- SessionState.get().getHiveHistory().logPlanProgress(plan);
- } catch (Exception e) {
- }
- }
- console.printInfo("OK");
-
- return (0);
- }
-
- /**
- * Launches a new task
- *
- * @param tsk
- * task being launched
- * @param queryId
- * Id of the query containing the task
- * @param noName
- * whether the task has a name set
- * @param running
- * map from taskresults to taskrunners
- * @param jobname
- * name of the task, if it is a map-reduce job
- * @param jobs
- * number of map-reduce jobs
- * @param curJobNo
- * the sequential number of the next map-reduce job
- * @return the updated number of last the map-reduce job launched
- */
-
- public void launchTask(Task<? extends Serializable> tsk, String queryId,
- boolean noName, Map<TaskResult, TaskRunner> running,
- String jobname, int jobs, DriverContext cxt) {
-
- if (SessionState.get() != null) {
- SessionState.get().getHiveHistory()
- .startTask(queryId, tsk, tsk.getClass().getName());
- }
- if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
- if (noName) {
- conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "("
- + tsk.getId() + ")");
- }
- cxt.incCurJobNo(1);
- console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of "
- + jobs);
- }
- tsk.initialize(conf, plan, cxt);
- TaskResult tskRes = new TaskResult();
- TaskRunner tskRun = new TaskRunner(tsk, tskRes);
-
- // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
- // Launch Task: hivesterix tweak
- if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
- // Launch it in the parallel mode, as a separate thread only for MR
- // tasks
- tskRes.setRunning(false);
- tskRes.setExitVal(0);
- } else if (tsk instanceof ConditionalTask) {
- ConditionalTask condTask = (ConditionalTask) tsk;
- ConditionalResolver crs = condTask.getResolver();
- if (crs instanceof ConditionalResolverMergeFiles) {
- tskRes.setRunning(false);
- tskRes.setExitVal(0);
-
- List<Task<? extends Serializable>> children = condTask
- .getListTasks();
- for (Task<? extends Serializable> child : children)
- if (child instanceof MapRedTask)
- cxt.addToRunnable(child);
- }
- } else {
- tskRun.runSequential();
- }
- running.put(tskRes, tskRun);
- return;
- }
-
- /**
- * Cleans up remaining tasks in case of failure
- */
-
- public void taskCleanup() {
- // The currently existing Shutdown hooks will be automatically called,
- // killing the map-reduce processes.
- // The non MR processes will be killed as well.
- System.exit(9);
- }
-
- /**
- * Polls running tasks to see if a task has ended.
- *
- * @param results
- * Set of result objects for running tasks
- * @return The result object for any completed/failed task
- */
-
- public TaskResult pollTasks(Set<TaskResult> results) {
- Iterator<TaskResult> resultIterator = results.iterator();
- while (true) {
- while (resultIterator.hasNext()) {
- TaskResult tskRes = resultIterator.next();
- if (tskRes.isRunning() == false) {
- return tskRes;
- }
- }
-
- // In this loop, nothing was found
- // Sleep 10 seconds and restart
- try {
- Thread.sleep(sleeptime);
- } catch (InterruptedException ie) {
- // Do Nothing
- ;
- }
- resultIterator = results.iterator();
- }
- }
-
- public boolean getResults(ArrayList<String> res) throws IOException {
- if (plan != null && plan.getFetchTask() != null) {
- FetchTask ft = plan.getFetchTask();
- ft.setMaxRows(maxRows);
- return ft.fetch(res);
- }
-
- if (resStream == null) {
- resStream = ctx.getStream();
- }
- if (resStream == null) {
- return false;
- }
-
- int numRows = 0;
- String row = null;
-
- while (numRows < maxRows) {
- if (resStream == null) {
- if (numRows > 0) {
- return true;
- } else {
- return false;
- }
- }
-
- bos.reset();
- Utilities.StreamStatus ss;
- try {
- ss = Utilities.readColumn(resStream, bos);
- if (bos.getCount() > 0) {
- row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
- } else if (ss == Utilities.StreamStatus.TERMINATED) {
- row = new String();
- }
-
- if (row != null) {
- numRows++;
- res.add(row);
- }
- } catch (IOException e) {
- console.printError("FAILED: Unexpected IO exception : "
- + e.getMessage());
- res = null;
- return false;
- }
-
- if (ss == Utilities.StreamStatus.EOF) {
- resStream = ctx.getStream();
- }
- }
- return true;
- }
-
- public int close() {
- try {
- if (plan != null) {
- FetchTask fetchTask = plan.getFetchTask();
- if (null != fetchTask) {
- try {
- fetchTask.clearFetch();
- } catch (Exception e) {
- LOG.debug(" Exception while clearing the Fetch task ",
- e);
- }
- }
- }
- if (ctx != null) {
- ctx.clear();
- }
- if (null != resStream) {
- try {
- ((FSDataInputStream) resStream).close();
- } catch (Exception e) {
- LOG.debug(" Exception while closing the resStream ", e);
- }
- }
- } catch (Exception e) {
- console.printError("FAILED: Hive Internal Error: "
- + Utilities.getNameMessage(e) + "\n"
- + org.apache.hadoop.util.StringUtils.stringifyException(e));
- return 13;
- }
-
- return 0;
- }
-
- public void destroy() {
- releaseLocks();
- }
-
- public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan()
- throws IOException {
- return plan.getQueryPlan();
- }
-
- public int getTryCount() {
- return tryCount;
- }
-
- public void setTryCount(int tryCount) {
- this.tryCount = tryCount;
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
deleted file mode 100644
index b174432..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * GenericUDAFAverage.
- *
- */
-@Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers")
-public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory
- .getLog(GenericUDAFAverage.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFAverageEvaluator();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * GenericUDAFAverageEvaluator.
- *
- */
- public static class GenericUDAFAverageEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- PrimitiveObjectInspector inputOI;
-
- // For PARTIAL2 and FINAL
- StructObjectInspector soi;
- StructField countField;
- StructField sumField;
- LongObjectInspector countFieldOI;
- DoubleObjectInspector sumFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- Object[] partialResult;
-
- // For FINAL and COMPLETE
- DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputOI = (PrimitiveObjectInspector) parameters[0];
- } else {
- soi = (StructObjectInspector) parameters[0];
- countField = soi.getStructFieldRef("count");
- sumField = soi.getStructFieldRef("sum");
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- sumFieldOI = (DoubleObjectInspector) sumField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a "long" count and a "double" sum.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("sum");
- partialResult = new Object[2];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- result = new DoubleWritable(0);
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class AverageAgg implements SerializableBuffer {
- long count;
- double sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- sum = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(sum);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- AverageAgg result = new AverageAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- myagg.count = 0;
- myagg.sum = 0;
- }
-
- boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- Object p = parameters[0];
- if (p != null) {
- AverageAgg myagg = (AverageAgg) agg;
- try {
- double v = PrimitiveObjectInspectorUtils.getDouble(p,
- inputOI);
- myagg.count++;
- myagg.sum += v;
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.sum);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- AverageAgg myagg = (AverageAgg) agg;
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialSum = soi.getStructFieldData(partial, sumField);
- myagg.count += countFieldOI.get(partialCount);
- myagg.sum += sumFieldOI.get(partialSum);
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- AverageAgg myagg = (AverageAgg) agg;
- if (myagg.count == 0) {
- return null;
- } else {
- result.set(myagg.sum / myagg.count);
- return result;
- }
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
deleted file mode 100644
index 716faac..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
+++ /dev/null
@@ -1,428 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the Pearson correlation coefficient corr(x, y), using the following
- * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel
- * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe
- * Pebay, Sandia Labs and
- * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald
- * Knuth.
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
- * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
- * - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)):
- * <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
- *
- * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
- * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- *
- */
-@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
- + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
- + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
- + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
- + " COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
- + "where neither x nor y is null,\n"
- + "COVAR_POP is the population covariance,\n"
- + "and STDDEV_POP is the population standard deviation.")
-public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 2) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly two arguments are expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
-
- if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(1,
- "Only primitive type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
-
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- return new GenericUDAFCorrelationEvaluator();
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(1,
- "Only numeric type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the Pearson correlation coefficient using a stable one-pass
- * algorithm, based on work by Philippe Pébay and Donald Knuth.
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
- * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
- * mx_(n-1))*(y_n - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n -
- * mx_n)(x_n - mx_(n-1)): <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n
- * - my_(n-1)): <variance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B)
- * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) =
- * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
- *
- */
- public static class GenericUDAFCorrelationEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector xInputOI;
- private PrimitiveObjectInspector yInputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField xavgField;
- private StructField yavgField;
- private StructField xvarField;
- private StructField yvarField;
- private StructField covarField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector xavgFieldOI;
- private DoubleObjectInspector yavgFieldOI;
- private DoubleObjectInspector xvarFieldOI;
- private DoubleObjectInspector yvarFieldOI;
- private DoubleObjectInspector covarFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- assert (parameters.length == 2);
- xInputOI = (PrimitiveObjectInspector) parameters[0];
- yInputOI = (PrimitiveObjectInspector) parameters[1];
- } else {
- assert (parameters.length == 1);
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- xavgField = soi.getStructFieldRef("xavg");
- yavgField = soi.getStructFieldRef("yavg");
- xvarField = soi.getStructFieldRef("xvar");
- yvarField = soi.getStructFieldRef("yvar");
- covarField = soi.getStructFieldRef("covar");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- xavgFieldOI = (DoubleObjectInspector) xavgField
- .getFieldObjectInspector();
- yavgFieldOI = (DoubleObjectInspector) yavgField
- .getFieldObjectInspector();
- xvarFieldOI = (DoubleObjectInspector) xvarField
- .getFieldObjectInspector();
- yvarFieldOI = (DoubleObjectInspector) yvarField
- .getFieldObjectInspector();
- covarFieldOI = (DoubleObjectInspector) covarField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count, two double averages, two double variances,
- // and a double covariance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("xavg");
- fname.add("yavg");
- fname.add("xvar");
- fname.add("yvar");
- fname.add("covar");
-
- partialResult = new Object[6];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
- partialResult[3] = new DoubleWritable(0);
- partialResult[4] = new DoubleWritable(0);
- partialResult[5] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number n of elements
- double xavg; // average of x elements
- double yavg; // average of y elements
- double xvar; // n times the variance of x elements
- double yvar; // n times the variance of y elements
- double covar; // n times the covariance
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- xavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- xvar = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yvar = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- covar = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xvar, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yvar, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(covar, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(xavg);
- output.writeDouble(yavg);
- output.writeDouble(xvar);
- output.writeDouble(yvar);
- output.writeDouble(covar);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.xavg = 0;
- myagg.yavg = 0;
- myagg.xvar = 0;
- myagg.yvar = 0;
- myagg.covar = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 2);
- Object px = parameters[0];
- Object py = parameters[1];
- if (px != null && py != null) {
- StdAgg myagg = (StdAgg) agg;
- double vx = PrimitiveObjectInspectorUtils.getDouble(px,
- xInputOI);
- double vy = PrimitiveObjectInspectorUtils.getDouble(py,
- yInputOI);
- double xavgOld = myagg.xavg;
- double yavgOld = myagg.yavg;
- myagg.count++;
- myagg.xavg += (vx - xavgOld) / myagg.count;
- myagg.yavg += (vy - yavgOld) / myagg.count;
- if (myagg.count > 1) {
- myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
- myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
- myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.xavg);
- ((DoubleWritable) partialResult[2]).set(myagg.yavg);
- ((DoubleWritable) partialResult[3]).set(myagg.xvar);
- ((DoubleWritable) partialResult[4]).set(myagg.yvar);
- ((DoubleWritable) partialResult[5]).set(myagg.covar);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialXAvg = soi.getStructFieldData(partial, xavgField);
- Object partialYAvg = soi.getStructFieldData(partial, yavgField);
- Object partialXVar = soi.getStructFieldData(partial, xvarField);
- Object partialYVar = soi.getStructFieldData(partial, yvarField);
- Object partialCovar = soi.getStructFieldData(partial,
- covarField);
-
- long nA = myagg.count;
- long nB = countFieldOI.get(partialCount);
-
- if (nA == 0) {
- // Just copy the information since there is nothing so far
- myagg.count = countFieldOI.get(partialCount);
- myagg.xavg = xavgFieldOI.get(partialXAvg);
- myagg.yavg = yavgFieldOI.get(partialYAvg);
- myagg.xvar = xvarFieldOI.get(partialXVar);
- myagg.yvar = yvarFieldOI.get(partialYVar);
- myagg.covar = covarFieldOI.get(partialCovar);
- }
-
- if (nA != 0 && nB != 0) {
- // Merge the two partials
- double xavgA = myagg.xavg;
- double yavgA = myagg.yavg;
- double xavgB = xavgFieldOI.get(partialXAvg);
- double yavgB = yavgFieldOI.get(partialYAvg);
- double xvarB = xvarFieldOI.get(partialXVar);
- double yvarB = yvarFieldOI.get(partialYVar);
- double covarB = covarFieldOI.get(partialCovar);
-
- myagg.count += nB;
- myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
- myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
- myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB)
- * myagg.count;
- myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB)
- * myagg.count;
- myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB)
- * ((double) (nA * nB) / myagg.count);
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count < 2) { // SQL standard - return null for zero or one
- // pair
- return null;
- } else {
- getResult().set(
- myagg.covar / java.lang.Math.sqrt(myagg.xvar)
- / java.lang.Math.sqrt(myagg.yvar));
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
deleted file mode 100644
index 4160d5b..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * This class implements the COUNT aggregation function as in SQL.
- */
-@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including "
- + "rows containing NULL values.\n"
-
- + "_FUNC_(expr) - Returns the number of rows for which the supplied "
- + "expression is non-NULL.\n"
-
- + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for "
- + "which the supplied expression(s) are unique and non-NULL.")
-public class GenericUDAFCount implements GenericUDAFResolver2 {
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- // This method implementation is preserved for backward compatibility.
- return new GenericUDAFCountEvaluator();
- }
-
- @Override
- public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo)
- throws SemanticException {
-
- TypeInfo[] parameters = paramInfo.getParameters();
-
- if (parameters.length == 0) {
- if (!paramInfo.isAllColumns()) {
- throw new UDFArgumentException("Argument expected");
- }
- assert !paramInfo.isDistinct() : "DISTINCT not supported with *";
- } else {
- if (parameters.length > 1 && !paramInfo.isDistinct()) {
- throw new UDFArgumentException(
- "DISTINCT keyword must be specified");
- }
- assert !paramInfo.isAllColumns() : "* not supported in expression list";
- }
-
- return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo
- .isAllColumns());
- }
-
- /**
- * GenericUDAFCountEvaluator.
- *
- */
- public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator {
- private boolean countAllColumns = false;
- private LongObjectInspector partialCountAggOI;
- private LongWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
- partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- result = new LongWritable(0);
- return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- }
-
- private GenericUDAFCountEvaluator setCountAllColumns(
- boolean countAllCols) {
- countAllColumns = countAllCols;
- return this;
- }
-
- /** class for storing count value. */
- static class CountAgg implements SerializableBuffer {
- long value;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- value = BufferSerDeUtil.getLong(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(value, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(value);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- CountAgg buffer = new CountAgg();
- reset(buffer);
- return buffer;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- ((CountAgg) agg).value = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- // parameters == null means the input table/split is empty
- if (parameters == null) {
- return;
- }
- if (countAllColumns) {
- assert parameters.length == 0;
- ((CountAgg) agg).value++;
- } else {
- assert parameters.length > 0;
- boolean countThisRow = true;
- for (Object nextParam : parameters) {
- if (nextParam == null) {
- countThisRow = false;
- break;
- }
- }
- if (countThisRow) {
- ((CountAgg) agg).value++;
- }
- }
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- long p = partialCountAggOI.get(partial);
- ((CountAgg) agg).value += p;
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- result.set(((CountAgg) agg).value);
- return result;
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
- }
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
deleted file mode 100644
index 11d9dc3..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the covariance covar_pop(x, y), using the following one-pass method
- * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and
- * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs):
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
- * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
- * - my_n) : <covariance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
- *
- */
-@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
- + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n"
- + "will be returned. Otherwise, it computes the following:\n"
- + " (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n"
- + "where neither x nor y is null.")
-public class GenericUDAFCovariance extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 2) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly two arguments are expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
-
- if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(1,
- "Only primitive type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
-
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- return new GenericUDAFCovarianceEvaluator();
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(1,
- "Only numeric or string type arguments are accepted but "
- + parameters[1].getTypeName() + " is passed.");
- }
- case STRING:
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the variance using the algorithm described in
- * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance,
- * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass
- * Parallel Computation of Covariances and Arbitrary-Order Statistical
- * Moments", Technical Report SAND2008-6212, Sandia National Laboratories,
- * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf
- *
- * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
- * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
- * mx_(n-1))*(y_n - my_n) : <covariance * n>
- *
- * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
- *
- * This one-pass algorithm is stable.
- *
- */
- public static class GenericUDAFCovarianceEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector xInputOI;
- private PrimitiveObjectInspector yInputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField xavgField;
- private StructField yavgField;
- private StructField covarField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector xavgFieldOI;
- private DoubleObjectInspector yavgFieldOI;
- private DoubleObjectInspector covarFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- assert (parameters.length == 2);
- xInputOI = (PrimitiveObjectInspector) parameters[0];
- yInputOI = (PrimitiveObjectInspector) parameters[1];
- } else {
- assert (parameters.length == 1);
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- xavgField = soi.getStructFieldRef("xavg");
- yavgField = soi.getStructFieldRef("yavg");
- covarField = soi.getStructFieldRef("covar");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- xavgFieldOI = (DoubleObjectInspector) xavgField
- .getFieldObjectInspector();
- yavgFieldOI = (DoubleObjectInspector) yavgField
- .getFieldObjectInspector();
- covarFieldOI = (DoubleObjectInspector) covarField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count, two double averages, and a double covariance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("xavg");
- fname.add("yavg");
- fname.add("covar");
-
- partialResult = new Object[4];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
- partialResult[3] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number n of elements
- double xavg; // average of x elements
- double yavg; // average of y elements
- double covar; // n times the covariance
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- xavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- yavg = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- covar = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(xavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(yavg, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(covar, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(xavg);
- output.writeDouble(yavg);
- output.writeDouble(covar);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.xavg = 0;
- myagg.yavg = 0;
- myagg.covar = 0;
- }
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 2);
- Object px = parameters[0];
- Object py = parameters[1];
- if (px != null && py != null) {
- StdAgg myagg = (StdAgg) agg;
- double vx = PrimitiveObjectInspectorUtils.getDouble(px,
- xInputOI);
- double vy = PrimitiveObjectInspectorUtils.getDouble(py,
- yInputOI);
- myagg.count++;
- myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count;
- if (myagg.count > 1) {
- myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg);
- }
- myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count;
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.xavg);
- ((DoubleWritable) partialResult[2]).set(myagg.yavg);
- ((DoubleWritable) partialResult[3]).set(myagg.covar);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialXAvg = soi.getStructFieldData(partial, xavgField);
- Object partialYAvg = soi.getStructFieldData(partial, yavgField);
- Object partialCovar = soi.getStructFieldData(partial,
- covarField);
-
- long nA = myagg.count;
- long nB = countFieldOI.get(partialCount);
-
- if (nA == 0) {
- // Just copy the information since there is nothing so far
- myagg.count = countFieldOI.get(partialCount);
- myagg.xavg = xavgFieldOI.get(partialXAvg);
- myagg.yavg = yavgFieldOI.get(partialYAvg);
- myagg.covar = covarFieldOI.get(partialCovar);
- }
-
- if (nA != 0 && nB != 0) {
- // Merge the two partials
- double xavgA = myagg.xavg;
- double yavgA = myagg.yavg;
- double xavgB = xavgFieldOI.get(partialXAvg);
- double yavgB = yavgFieldOI.get(partialYAvg);
- double covarB = covarFieldOI.get(partialCovar);
-
- myagg.count += nB;
- myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
- myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
- myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB)
- * ((double) (nA * nB) / myagg.count);
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count == 0) { // SQL standard - return null for zero
- // elements
- return null;
- } else {
- getResult().set(myagg.covar / (myagg.count));
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
deleted file mode 100644
index 0323531..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * GenericUDAFSum.
- *
- */
-@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
-public class GenericUDAFSum extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- return new GenericUDAFSumLong();
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFSumDouble();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * GenericUDAFSumDouble.
- *
- */
- public static class GenericUDAFSumDouble extends GenericUDAFEvaluator {
- private PrimitiveObjectInspector inputOI;
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
- result = new DoubleWritable(0);
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
-
- /** class for storing double sum value. */
- static class SumDoubleAgg implements SerializableBuffer {
- boolean empty;
- double sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- empty = BufferSerDeUtil.getBoolean(data, start);
- start += 1;
- sum = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeBoolean(empty, data, start);
- start += 1;
- BufferSerDeUtil.writeDouble(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeBoolean(empty);
- output.writeDouble(sum);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- SumDoubleAgg result = new SumDoubleAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- myagg.empty = true;
- myagg.sum = 0;
- }
-
- boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- try {
- merge(agg, parameters[0]);
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- myagg.empty = false;
- myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial,
- inputOI);
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- SumDoubleAgg myagg = (SumDoubleAgg) agg;
- if (myagg.empty) {
- return null;
- }
- result.set(myagg.sum);
- return result;
- }
-
- }
-
- /**
- * GenericUDAFSumLong.
- *
- */
- public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
- private PrimitiveObjectInspector inputOI;
- private LongWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
- result = new LongWritable(0);
- inputOI = (PrimitiveObjectInspector) parameters[0];
- return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
- }
-
- /** class for storing double sum value. */
- static class SumLongAgg implements SerializableBuffer {
- boolean empty;
- long sum;
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- empty = BufferSerDeUtil.getBoolean(data, start);
- start += 1;
- sum = BufferSerDeUtil.getLong(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeBoolean(empty, data, start);
- start += 1;
- BufferSerDeUtil.writeLong(sum, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeBoolean(empty);
- output.writeLong(sum);
- }
- }
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- SumLongAgg result = new SumLongAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- SumLongAgg myagg = (SumLongAgg) agg;
- myagg.empty = true;
- myagg.sum = 0;
- }
-
- private boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- try {
- merge(agg, parameters[0]);
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- return terminate(agg);
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- SumLongAgg myagg = (SumLongAgg) agg;
- myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial,
- inputOI);
- myagg.empty = false;
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- SumLongAgg myagg = (SumLongAgg) agg;
- if (myagg.empty) {
- return null;
- }
- result.set(myagg.sum);
- return result;
- }
-
- }
-
-}
diff --git a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
deleted file mode 100644
index 4c16f5a..0000000
--- a/hivesterix/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
+++ /dev/null
@@ -1,331 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.udf.generic;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.util.StringUtils;
-
-import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
-import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
-
-/**
- * Compute the variance. This class is extended by: GenericUDAFVarianceSample
- * GenericUDAFStd GenericUDAFStdSample
- *
- */
-@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers")
-public class GenericUDAFVariance extends AbstractGenericUDAFResolver {
-
- static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class
- .getName());
-
- @Override
- public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
- throws SemanticException {
- if (parameters.length != 1) {
- throw new UDFArgumentTypeException(parameters.length - 1,
- "Exactly one argument is expected.");
- }
-
- if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
- throw new UDFArgumentTypeException(0,
- "Only primitive type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
- case BYTE:
- case SHORT:
- case INT:
- case LONG:
- case FLOAT:
- case DOUBLE:
- case STRING:
- return new GenericUDAFVarianceEvaluator();
- case BOOLEAN:
- default:
- throw new UDFArgumentTypeException(0,
- "Only numeric or string type arguments are accepted but "
- + parameters[0].getTypeName() + " is passed.");
- }
- }
-
- /**
- * Evaluate the variance using the algorithm described by Chan, Golub, and
- * LeVeque in
- * "Algorithms for computing the sample variance: analysis and recommendations"
- * The American Statistician, 37 (1983) pp. 242--247.
- *
- * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2)
- *
- * where: - variance is sum[x-avg^2] (this is actually n times the variance)
- * and is updated at every step. - n is the count of elements in chunk1 - m
- * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 =
- * sum of elements in chunk2.
- *
- * This algorithm was proven to be numerically stable by J.L. Barlow in
- * "Error analysis of a pairwise summation algorithm to compute sample variance"
- * Numer. Math, 58 (1991) pp. 583--590
- *
- */
- public static class GenericUDAFVarianceEvaluator extends
- GenericUDAFEvaluator {
-
- // For PARTIAL1 and COMPLETE
- private PrimitiveObjectInspector inputOI;
-
- // For PARTIAL2 and FINAL
- private StructObjectInspector soi;
- private StructField countField;
- private StructField sumField;
- private StructField varianceField;
- private LongObjectInspector countFieldOI;
- private DoubleObjectInspector sumFieldOI;
-
- // For PARTIAL1 and PARTIAL2
- private Object[] partialResult;
-
- // For FINAL and COMPLETE
- private DoubleWritable result;
-
- @Override
- public ObjectInspector init(Mode m, ObjectInspector[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- super.init(m, parameters);
-
- // init input
- if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
- inputOI = (PrimitiveObjectInspector) parameters[0];
- } else {
- soi = (StructObjectInspector) parameters[0];
-
- countField = soi.getStructFieldRef("count");
- sumField = soi.getStructFieldRef("sum");
- varianceField = soi.getStructFieldRef("variance");
-
- countFieldOI = (LongObjectInspector) countField
- .getFieldObjectInspector();
- sumFieldOI = (DoubleObjectInspector) sumField
- .getFieldObjectInspector();
- }
-
- // init output
- if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
- // The output of a partial aggregation is a struct containing
- // a long count and doubles sum and variance.
-
- ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
-
- foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
- foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
-
- ArrayList<String> fname = new ArrayList<String>();
- fname.add("count");
- fname.add("sum");
- fname.add("variance");
-
- partialResult = new Object[3];
- partialResult[0] = new LongWritable(0);
- partialResult[1] = new DoubleWritable(0);
- partialResult[2] = new DoubleWritable(0);
-
- return ObjectInspectorFactory.getStandardStructObjectInspector(
- fname, foi);
-
- } else {
- setResult(new DoubleWritable(0));
- return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
- }
- }
-
- static class StdAgg implements SerializableBuffer {
- long count; // number of elements
- double sum; // sum of elements
- double variance; // sum[x-avg^2] (this is actually n times the
- // variance)
-
- @Override
- public void deSerializeAggBuffer(byte[] data, int start, int len) {
- count = BufferSerDeUtil.getLong(data, start);
- start += 8;
- sum = BufferSerDeUtil.getDouble(data, start);
- start += 8;
- variance = BufferSerDeUtil.getDouble(data, start);
- }
-
- @Override
- public void serializeAggBuffer(byte[] data, int start, int len) {
- BufferSerDeUtil.writeLong(count, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(sum, data, start);
- start += 8;
- BufferSerDeUtil.writeDouble(variance, data, start);
- }
-
- @Override
- public void serializeAggBuffer(DataOutput output)
- throws IOException {
- output.writeLong(count);
- output.writeDouble(sum);
- output.writeDouble(variance);
- }
- };
-
- @Override
- public AggregationBuffer getNewAggregationBuffer() throws HiveException {
- StdAgg result = new StdAgg();
- reset(result);
- return result;
- }
-
- @Override
- public void reset(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- myagg.count = 0;
- myagg.sum = 0;
- myagg.variance = 0;
- }
-
- private boolean warned = false;
-
- @Override
- public void iterate(AggregationBuffer agg, Object[] parameters)
- throws HiveException {
- assert (parameters.length == 1);
- Object p = parameters[0];
- if (p != null) {
- StdAgg myagg = (StdAgg) agg;
- try {
- double v = PrimitiveObjectInspectorUtils.getDouble(p,
- inputOI);
- myagg.count++;
- myagg.sum += v;
- if (myagg.count > 1) {
- double t = myagg.count * v - myagg.sum;
- myagg.variance += (t * t)
- / ((double) myagg.count * (myagg.count - 1));
- }
- } catch (NumberFormatException e) {
- if (!warned) {
- warned = true;
- LOG.warn(getClass().getSimpleName() + " "
- + StringUtils.stringifyException(e));
- LOG.warn(getClass().getSimpleName()
- + " ignoring similar exceptions.");
- }
- }
- }
- }
-
- @Override
- public Object terminatePartial(AggregationBuffer agg)
- throws HiveException {
- StdAgg myagg = (StdAgg) agg;
- ((LongWritable) partialResult[0]).set(myagg.count);
- ((DoubleWritable) partialResult[1]).set(myagg.sum);
- ((DoubleWritable) partialResult[2]).set(myagg.variance);
- return partialResult;
- }
-
- @Override
- public void merge(AggregationBuffer agg, Object partial)
- throws HiveException {
- if (partial != null) {
- StdAgg myagg = (StdAgg) agg;
-
- Object partialCount = soi.getStructFieldData(partial,
- countField);
- Object partialSum = soi.getStructFieldData(partial, sumField);
- Object partialVariance = soi.getStructFieldData(partial,
- varianceField);
-
- long n = myagg.count;
- long m = countFieldOI.get(partialCount);
-
- if (n == 0) {
- // Just copy the information since there is nothing so far
- myagg.variance = sumFieldOI.get(partialVariance);
- myagg.count = countFieldOI.get(partialCount);
- myagg.sum = sumFieldOI.get(partialSum);
- }
-
- if (m != 0 && n != 0) {
- // Merge the two partials
-
- double a = myagg.sum;
- double b = sumFieldOI.get(partialSum);
-
- myagg.count += m;
- myagg.sum += b;
- double t = (m / (double) n) * a - b;
- myagg.variance += sumFieldOI.get(partialVariance)
- + ((n / (double) m) / ((double) n + m)) * t * t;
- }
- }
- }
-
- @Override
- public Object terminate(AggregationBuffer agg) throws HiveException {
- StdAgg myagg = (StdAgg) agg;
-
- if (myagg.count == 0) { // SQL standard - return null for zero
- // elements
- return null;
- } else {
- if (myagg.count > 1) {
- getResult().set(myagg.variance / (myagg.count));
- } else { // for one element the variance is always 0
- getResult().set(0);
- }
- return getResult();
- }
- }
-
- public void setResult(DoubleWritable result) {
- this.result = result;
- }
-
- public DoubleWritable getResult() {
- return result;
- }
- }
-
-}
diff --git a/hivesterix/src/main/scripts/run.cmd b/hivesterix/src/main/scripts/run.cmd
deleted file mode 100755
index b8eb4a0..0000000
--- a/hivesterix/src/main/scripts/run.cmd
+++ /dev/null
@@ -1,63 +0,0 @@
-@ECHO OFF
-SETLOCAL
-
-:: Licensed to the Apache Software Foundation (ASF) under one or more
-:: contributor license agreements. See the NOTICE file distributed with
-:: this work for additional information regarding copyright ownership.
-:: The ASF licenses this file to You under the Apache License, Version 2.0
-:: (the "License"); you may not use this file except in compliance with
-:: the License. You may obtain a copy of the License at
-::
-:: http://www.apache.org/licenses/LICENSE-2.0
-::
-:: Unless required by applicable law or agreed to in writing, software
-:: distributed under the License is distributed on an "AS IS" BASIS,
-:: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-:: See the License for the specific language governing permissions and
-:: limitations under the License.
-
-:: JAVA classpath
-:: Use the local variable CLASSPATH to add custom entries (e.g. JDBC drivers) to
-:: the classpath. Separate multiple paths with ":". Enclose the value
-:: in double quotes. Adding additional files or locations on separate
-:: lines makes things clearer.
-:: Note: If under running under cygwin use "/cygdrive/c/..." for "C:/..."
-:: Example:
-::
-:: Set the CLASSPATH to a jar file and a directory. Note that
-:: "classes dir" is a directory of class files with a space in the name.
-::
-:: CLASSPATH="usr/local/Product1/lib/product.jar"
-:: CLASSPATH="${CLASSPATH}:../MyProject/classes dir"
-::
-SET CLASSPATH="@classpath@"
-
-:: JVM parameters
-:: If you want to modify the default parameters (e.g. maximum heap size -Xmx)
-:: for the Java virtual machine set the local variable JVM_PARAMETERS below
-:: Example:
-:: JVM_PARAMETERS=-Xms100M -Xmx200M
-::
-:: Below are the JVM parameters needed to do remote debugging using Intellij
-:: IDEA. Uncomment and then do: JVM_PARAMETERS="$IDEA_REMOTE_DEBUG_PARAMS"
-:: IDEA_REMOTE_DEBUG_PARAMS="-Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"
-::
-:: JVM_PARAMETERS=
-
-:: ---------------------------------------------------------------------------
-:: Default configuration. Do not modify below this line.
-:: ---------------------------------------------------------------------------
-:: Application specific parameters
-
-SET MAIN_CLASS=@main.class@
-SET JVM_PARAMS=@jvm.params@
-SET PROGRAM_PARAMS=@program.params@
-
-:: Try to find java virtual machine
-IF NOT DEFINED JAVA (
- IF NOT DEFINED JAVA_HOME SET JAVA="java.exe"
- IF DEFINED JAVA_HOME SET JAVA="%JAVA_HOME%\bin\java.exe"
-)
-
-:: Run program
-%JAVA% %JVM_PARAMS% %JVM_PARAMETERS% -classpath %CLASSPATH% %MAIN_CLASS% %PROGRAM_PARAMS% %*
diff --git a/hivesterix/src/main/scripts/run.sh b/hivesterix/src/main/scripts/run.sh
deleted file mode 100755
index a998626..0000000
--- a/hivesterix/src/main/scripts/run.sh
+++ /dev/null
@@ -1,81 +0,0 @@
-#!/bin/sh
-# JAVA classpath
-# Use the local variable CLASSPATH to add custom entries (e.g. JDBC drivers) to
-# the classpath. Separate multiple paths with ":". Enclose the value
-# in double quotes. Adding additional files or locations on separate
-# lines makes things clearer.
-# Note: If under running under cygwin use "/cygdrive/c/..." for "C:/..."
-# Example:
-#
-# Set the CLASSPATH to a jar file and a directory. Note that
-# "classes dir" is a directory of class files with a space in the name.
-#
-# CLASSPATH="usr/local/Product1/lib/product.jar"
-# CLASSPATH="${CLASSPATH}:../MyProject/classes dir"
-#
-CLASSPATH="@classpath@"
-
-# JVM parameters
-# If you want to modify the default parameters (e.g. maximum heap size -Xmx)
-# for the Java virtual machine set the local variable JVM_PARAMETERS below
-# Example:
-# JVM_PARAMETERS=-Xms100M -Xmx200M
-#
-# Below are the JVM parameters needed to do remote debugging using Intellij
-# IDEA. Uncomment and then do: JVM_PARAMETERS="$IDEA_REMOTE_DEBUG_PARAMS"
-# IDEA_REMOTE_DEBUG_PARAMS="-Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"
-#
-# JVM_PARAMETERS=
-
-#run with shared memory setup
-#if [ -n "${RUN_SHARED_MEM}"]; then
-# JVM_PARAMETERS="${JVM_PARAMETERS} -Xdebug -Xnoagent -Djava.compiler=NONE -Xrunjdwp:transport=dt_shmem,server=n,address=javadebug,suspend=y"
-#fi
-
-# ---------------------------------------------------------------------------
-# Default configuration. Do not modify below this line.
-# ---------------------------------------------------------------------------
-# Application specific parameters
-
-MAIN_CLASS="@main.class@"
-JVM_PARAMS="@jvm.params@"
-PROGRAM_PARAMS="@program.params@"
-
-# Cygwin support. $cygwin _must_ be set to either true or false.
-case "`uname`" in
- CYGWIN*) cygwin=true ;;
- *) cygwin=false ;;
-esac
-
-# For Cygwin, ensure paths are in UNIX format before anything is touched
-if $cygwin; then
- [ -n "$JAVA_HOME" ] &&
- JAVA_HOME=`cygpath --unix "$JAVA_HOME"`
- [ -n "$CLASSPATH" ] &&
- CLASSPATH=`cygpath --path --unix "$CLASSPATH"`
-fi
-
-# Try to find java virtual machine
-if [ -z "${JAVA}" ]; then
- if [ -z "${JAVA_HOME}" ]; then
- JAVA=java
- else
- JAVA=${JAVA_HOME}/bin/java
- fi
-fi
-
-# Try to find directory where this script is located
-COMMAND="${PWD}/$0"
-if [ ! -f "${COMMAND}" ]; then
- COMMAND="$0"
-fi
-BASEDIR=`expr "${COMMAND}" : '\(.*\)/\.*'`
-
-# For Cygwin, switch paths to Windows format before running java
-if $cygwin; then
-# JAVA=`cygpath --path --windows "$JAVA"`
- CLASSPATH=`cygpath --path --windows "$CLASSPATH"`
-fi
-
-# Run program
-${JAVA} ${JVM_PARAMS} ${JVM_PARAMETERS} -classpath "${CLASSPATH}" ${MAIN_CLASS} ${PROGRAM_PARAMS} $*
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
deleted file mode 100644
index a69a3f2..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestCase.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-
-public class PerfTestCase extends AbstractPerfTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- PerfTestCase(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!equal(buf, sb)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else
- return false;
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
deleted file mode 100644
index 8d8178f..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuite.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
-
-public class PerfTestSuite extends AbstractPerfTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- PerfTestSuite testSuite = new PerfTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new PerfTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
deleted file mode 100644
index 258db22..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestCase;
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-
-public class PerfTestSuiteCaseGenerator extends AbstractPerfTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- PerfTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf);
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- long startTime = System.currentTimeMillis();
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- // driver.clear();
- i++;
- }
- long endTime = System.currentTimeMillis();
- System.out.println(resultFile.getName() + " execution time "
- + (endTime - startTime));
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- writeStringToFile(resultFile, sb);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
deleted file mode 100644
index 0a27ca2..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/PerfTestSuiteGenerator.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.perf;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.perf.base.AbstractPerfTestSuiteClass;
-
-public class PerfTestSuiteGenerator extends AbstractPerfTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/perf/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/perf/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/perf/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- PerfTestSuiteGenerator testSuite = new PerfTestSuiteGenerator();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("q18_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new PerfTestSuiteCaseGenerator(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
deleted file mode 100644
index f55d6a1..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestCase.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package edu.uci.ics.hivesterix.perf.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import junit.framework.TestCase;
-
-public class AbstractPerfTestCase extends TestCase {
- protected File queryFile;
-
- public AbstractPerfTestCase(String testName, File queryFile) {
- super(testName);
- }
-
- protected static void readFileToString(File file, StringBuilder buf)
- throws Exception {
- BufferedReader result = new BufferedReader(new FileReader(file));
- while (true) {
- String s = result.readLine();
- if (s == null) {
- break;
- } else {
- buf.append(s);
- buf.append('\n');
- }
- }
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringWriter buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringBuilder buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static String removeExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
deleted file mode 100644
index 05474ca..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/perf/base/AbstractPerfTestSuiteClass.java
+++ /dev/null
@@ -1,210 +0,0 @@
-package edu.uci.ics.hivesterix.perf.base;
-
-import java.io.BufferedReader;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import junit.framework.TestSuite;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
-import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
-import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
-import edu.uci.ics.hyracks.control.nc.NodeControllerService;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractPerfTestSuiteClass extends TestSuite {
-
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/perf/hadoop/conf";
- private static final String PATH_TO_HIVE_CONF = "src/test/resources/perf/hive/conf/hive-default.xml";
- private static final String PATH_TO_DATA = "src/test/resources/perf/data/";
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
-
- private JobConf conf = new JobConf();
- protected FileSystem dfs;
-
- private int numberOfNC = 2;
- private ClusterControllerService cc;
- private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
-
- /**
- * setup cluster
- *
- * @throws IOException
- */
- protected void setup() throws Exception {
- setupHdfs();
- setupHyracks();
- }
-
- private void setupHdfs() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- lfs.delete(new Path("metastore_db"), true);
-
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
-
- mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
- hconf.setVar(HiveConf.ConfVars.HADOOPJT,
- "localhost:" + mrCluster.getJobTrackerPort());
- hconf.setInt("mapred.min.split.size", 1342177280);
-
- conf = new JobConf(hconf);
- ConfUtil.setJobConf(conf);
-
- String fsName = conf.get("fs.default.name");
- hconf.set("hive.metastore.warehouse.dir",
- fsName.concat("/tmp/hivesterix"));
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- dfs.mkdirs(new Path(warehouse));
- ConfUtil.setHiveConf(hconf);
- }
-
- private void setupHyracks() throws Exception {
- // read hive conf
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
- SessionState.start(hconf);
- String ipAddress = hconf.get("hive.hyracks.host");
- int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int clusterPort = clientPort;
- String applicationName = hconf.get("hive.hyracks.app");
-
- // start hyracks cc
- CCConfig ccConfig = new CCConfig();
- ccConfig.clientNetIpAddress = ipAddress;
- ccConfig.clientNetPort = clientPort;
- ccConfig.clusterNetPort = clusterPort;
- ccConfig.profileDumpPeriod = 1000;
- ccConfig.heartbeatPeriod = 200000000;
- ccConfig.maxHeartbeatLapsePeriods = 200000000;
- cc = new ClusterControllerService(ccConfig);
- cc.start();
-
- // start hyracks nc
- for (int i = 0; i < numberOfNC; i++) {
- NCConfig ncConfig = new NCConfig();
- ncConfig.ccHost = ipAddress;
- ncConfig.clusterNetIPAddress = ipAddress;
- ncConfig.ccPort = clientPort;
- ncConfig.dataIPAddress = "127.0.0.1";
- ncConfig.nodeId = "nc" + i;
- NodeControllerService nc = new NodeControllerService(ncConfig);
- nc.start();
- ncs.put(ncConfig.nodeId, nc);
- }
-
- IHyracksClientConnection hcc = new HyracksConnection(
- ccConfig.clientNetIpAddress, clientPort);
- hcc.createApplication(applicationName, null);
- }
-
- protected void makeDir(String path) throws IOException {
- dfs.mkdirs(new Path(path));
- }
-
- protected void loadFiles(String src, String dest) throws IOException {
- dfs.copyFromLocalFile(new Path(src), new Path(dest));
- }
-
- protected void cleanup() throws Exception {
- cleanupHdfs();
- cleanupHyracks();
- }
-
- /**
- * cleanup hdfs cluster
- */
- private void cleanupHdfs() throws IOException {
- dfs.delete(new Path("/"), true);
- FileSystem.closeAll();
- dfsCluster.shutdown();
- }
-
- /**
- * cleanup hyracks cluster
- */
- private void cleanupHyracks() throws Exception {
- Iterator<NodeControllerService> iterator = ncs.values().iterator();
- while (iterator.hasNext()) {
- NodeControllerService nc = iterator.next();
- nc.stop();
- }
- cc.stop();
- }
-
- protected static List<String> getIgnoreList(String ignorePath)
- throws FileNotFoundException, IOException {
- BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
- String s = null;
- List<String> ignores = new ArrayList<String>();
- while ((s = reader.readLine()) != null) {
- ignores.add(s);
- }
- reader.close();
- return ignores;
- }
-
- protected static boolean isIgnored(String q, List<String> ignoreList) {
- for (String ignore : ignoreList) {
- if (ignore.equals(q)) {
- return true;
- }
- }
- return false;
- }
-
- protected void loadData() throws IOException {
-
- makeDir("/tpch");
- makeDir("/tpch/customer");
- makeDir("/tpch/lineitem");
- makeDir("/tpch/orders");
- makeDir("/tpch/part");
- makeDir("/tpch/partsupp");
- makeDir("/tpch/supplier");
- makeDir("/tpch/nation");
- makeDir("/tpch/region");
-
- makeDir("/jarod");
-
- loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
- loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
- loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
- loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
- loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
- loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
- loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
- loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
-
- loadFiles(PATH_TO_DATA + "ext-gby.tbl", "/jarod/");
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
deleted file mode 100644
index 560cef7..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractHivesterixTestCase.java
+++ /dev/null
@@ -1,52 +0,0 @@
-package edu.uci.ics.hivesterix.test.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.FileWriter;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import junit.framework.TestCase;
-
-public class AbstractHivesterixTestCase extends TestCase {
- protected File queryFile;
-
- public AbstractHivesterixTestCase(String testName, File queryFile) {
- super(testName);
- }
-
- protected static void readFileToString(File file, StringBuilder buf)
- throws Exception {
- BufferedReader result = new BufferedReader(new FileReader(file));
- while (true) {
- String s = result.readLine();
- if (s == null) {
- break;
- } else {
- buf.append(s);
- buf.append('\n');
- }
- }
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringWriter buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static void writeStringToFile(File file, StringBuilder buf)
- throws Exception {
- PrintWriter result = new PrintWriter(new FileWriter(file));
- result.print(buf);
- result.close();
- }
-
- protected static String removeExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
deleted file mode 100644
index e9a5736..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/base/AbstractTestSuiteClass.java
+++ /dev/null
@@ -1,217 +0,0 @@
-package edu.uci.ics.hivesterix.test.base;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-import junit.framework.TestSuite;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MiniMRCluster;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
-import edu.uci.ics.hyracks.control.cc.ClusterControllerService;
-import edu.uci.ics.hyracks.control.common.controllers.CCConfig;
-import edu.uci.ics.hyracks.control.common.controllers.NCConfig;
-import edu.uci.ics.hyracks.control.nc.NodeControllerService;
-
-@SuppressWarnings("deprecation")
-public abstract class AbstractTestSuiteClass extends TestSuite {
-
- private static final String PATH_TO_HADOOP_CONF = "src/test/resources/runtimefunctionts/hadoop/conf";
- private static final String PATH_TO_HIVE_CONF = "src/test/resources/runtimefunctionts/hive/conf/hive-default.xml";
-
- private static final String PATH_TO_CLUSTER_CONF = "src/test/resources/runtimefunctionts/hive/conf/topology.xml";
- private static final String PATH_TO_DATA = "src/test/resources/runtimefunctionts/data/";
-
- private MiniDFSCluster dfsCluster;
- private MiniMRCluster mrCluster;
-
- private JobConf conf = new JobConf();
- protected FileSystem dfs;
-
- private int numberOfNC = 2;
- private ClusterControllerService cc;
- private Map<String, NodeControllerService> ncs = new HashMap<String, NodeControllerService>();
-
- /**
- * setup cluster
- *
- * @throws IOException
- */
- protected void setup() throws Exception {
- setupHdfs();
- setupHyracks();
- }
-
- private void setupHdfs() throws IOException {
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/core-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/mapred-site.xml"));
- conf.addResource(new Path(PATH_TO_HADOOP_CONF + "/hdfs-site.xml"));
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
-
- FileSystem lfs = FileSystem.getLocal(new Configuration());
- lfs.delete(new Path("build"), true);
- lfs.delete(new Path("metastore_db"), true);
-
- System.setProperty("hadoop.log.dir", "logs");
- dfsCluster = new MiniDFSCluster(hconf, numberOfNC, true, null);
- dfs = dfsCluster.getFileSystem();
-
- mrCluster = new MiniMRCluster(2, dfs.getUri().toString(), 1);
- hconf.setVar(HiveConf.ConfVars.HADOOPJT,
- "localhost:" + mrCluster.getJobTrackerPort());
-
- conf = new JobConf(hconf);
- ConfUtil.setJobConf(conf);
-
- String fsName = conf.get("fs.default.name");
- hconf.set("hive.metastore.warehouse.dir",
- fsName.concat("/tmp/hivesterix"));
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- dfs.mkdirs(new Path(warehouse));
- ConfUtil.setHiveConf(hconf);
- }
-
- private void setupHyracks() throws Exception {
- // read hive conf
- HiveConf hconf = new HiveConf(SessionState.class);
- hconf.addResource(new Path(PATH_TO_HIVE_CONF));
- SessionState.start(hconf);
- String ipAddress = hconf.get("hive.hyracks.host");
- int clientPort = Integer.parseInt(hconf.get("hive.hyracks.port"));
- int netPort = clientPort + 1;
- String applicationName = hconf.get("hive.hyracks.app");
-
- // start hyracks cc
- CCConfig ccConfig = new CCConfig();
- ccConfig.clientNetIpAddress = ipAddress;
- ccConfig.clientNetPort = clientPort;
- ccConfig.clusterNetPort = netPort;
- ccConfig.profileDumpPeriod = 1000;
- ccConfig.heartbeatPeriod = 200000000;
- ccConfig.maxHeartbeatLapsePeriods = 200000000;
- ccConfig.clusterTopologyDefinition = new File(PATH_TO_CLUSTER_CONF);
- cc = new ClusterControllerService(ccConfig);
- cc.start();
-
- // start hyracks nc
- for (int i = 0; i < numberOfNC; i++) {
- NCConfig ncConfig = new NCConfig();
- ncConfig.ccHost = ipAddress;
- ncConfig.clusterNetIPAddress = ipAddress;
- ncConfig.ccPort = netPort;
- ncConfig.dataIPAddress = "127.0.0.1";
- ncConfig.nodeId = "nc" + i;
- NodeControllerService nc = new NodeControllerService(ncConfig);
- nc.start();
- ncs.put(ncConfig.nodeId, nc);
- }
-
- IHyracksClientConnection hcc = new HyracksConnection(
- ccConfig.clientNetIpAddress, clientPort);
- hcc.createApplication(applicationName, null);
- }
-
- protected void makeDir(String path) throws IOException {
- dfs.mkdirs(new Path(path));
- }
-
- protected void loadFiles(String src, String dest) throws IOException {
- dfs.copyFromLocalFile(new Path(src), new Path(dest));
- }
-
- protected void cleanup() throws Exception {
- cleanupHdfs();
- cleanupHyracks();
- }
-
- /**
- * cleanup hdfs cluster
- */
- private void cleanupHdfs() throws IOException {
- dfs.delete(new Path("/"), true);
- FileSystem.closeAll();
- dfsCluster.shutdown();
- }
-
- /**
- * cleanup hyracks cluster
- */
- private void cleanupHyracks() throws Exception {
- Iterator<NodeControllerService> iterator = ncs.values().iterator();
- while (iterator.hasNext()) {
- NodeControllerService nc = iterator.next();
- nc.stop();
- }
- cc.stop();
- }
-
- protected static List<String> getIgnoreList(String ignorePath)
- throws FileNotFoundException, IOException {
- BufferedReader reader = new BufferedReader(new FileReader(ignorePath));
- String s = null;
- List<String> ignores = new ArrayList<String>();
- while ((s = reader.readLine()) != null) {
- ignores.add(s);
- }
- reader.close();
- return ignores;
- }
-
- protected static boolean isIgnored(String q, List<String> ignoreList) {
- for (String ignore : ignoreList) {
- if (q.indexOf(ignore) >= 0) {
- return true;
- }
- }
- return false;
- }
-
- protected void loadData() throws IOException {
-
- makeDir("/tpch");
- makeDir("/tpch/customer");
- makeDir("/tpch/lineitem");
- makeDir("/tpch/orders");
- makeDir("/tpch/part");
- makeDir("/tpch/partsupp");
- makeDir("/tpch/supplier");
- makeDir("/tpch/nation");
- makeDir("/tpch/region");
-
- makeDir("/test");
- makeDir("/test/joinsrc1");
- makeDir("/test/joinsrc2");
-
- loadFiles(PATH_TO_DATA + "customer.tbl", "/tpch/customer/");
- loadFiles(PATH_TO_DATA + "lineitem.tbl", "/tpch/lineitem/");
- loadFiles(PATH_TO_DATA + "orders.tbl", "/tpch/orders/");
- loadFiles(PATH_TO_DATA + "part.tbl", "/tpch/part/");
- loadFiles(PATH_TO_DATA + "partsupp.tbl", "/tpch/partsupp/");
- loadFiles(PATH_TO_DATA + "supplier.tbl", "/tpch/supplier/");
- loadFiles(PATH_TO_DATA + "nation.tbl", "/tpch/nation/");
- loadFiles(PATH_TO_DATA + "region.tbl", "/tpch/region/");
-
- loadFiles(PATH_TO_DATA + "large_card_join_src.tbl", "/test/joinsrc1/");
- loadFiles(PATH_TO_DATA + "large_card_join_src_small.tbl",
- "/test/joinsrc2/");
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
deleted file mode 100644
index 800d6be..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/datagen/RecordBalance.java
+++ /dev/null
@@ -1,82 +0,0 @@
-package edu.uci.ics.hivesterix.test.datagen;
-
-import java.io.IOException;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.FileOutputFormat;
-import org.apache.hadoop.mapred.JobClient;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.MapReduceBase;
-import org.apache.hadoop.mapred.Mapper;
-import org.apache.hadoop.mapred.OutputCollector;
-import org.apache.hadoop.mapred.Reducer;
-import org.apache.hadoop.mapred.Reporter;
-import org.apache.hadoop.mapred.TextInputFormat;
-
-@SuppressWarnings("deprecation")
-public class RecordBalance {
-
- private static String confPath = System.getenv("HADDOP_HOME");
- private static Path[] inputPaths = { new Path("/tpch/100x/customer"),
- new Path("/tpch/100x/nation"), new Path("/tpch/100x/region"),
- new Path("/tpch/100x/lineitem"), new Path("/tpch/100x/orders"),
- new Path("/tpch/100x/part"), new Path("/tpch/100x/partsupp"),
- new Path("/tpch/100x/supplier") };
-
- private static Path[] outputPaths = { new Path("/tpch/100/customer"),
- new Path("/tpch/100/nation"), new Path("/tpch/100/region"),
- new Path("/tpch/100/lineitem"), new Path("/tpch/100/orders"),
- new Path("/tpch/100/part"), new Path("/tpch/100/partsupp"),
- new Path("/tpch/100/supplier") };
-
- public static class MapRecordOnly extends MapReduceBase implements
- Mapper<LongWritable, Text, LongWritable, Text> {
-
- public void map(LongWritable id, Text inputValue,
- OutputCollector<LongWritable, Text> output, Reporter reporter)
- throws IOException {
- output.collect(id, inputValue);
- }
- }
-
- public static class ReduceRecordOnly extends MapReduceBase implements
- Reducer<LongWritable, Text, NullWritable, Text> {
-
- NullWritable key = NullWritable.get();
-
- public void reduce(LongWritable inputKey, Iterator<Text> inputValue,
- OutputCollector<NullWritable, Text> output, Reporter reporter)
- throws IOException {
- while (inputValue.hasNext())
- output.collect(key, inputValue.next());
- }
- }
-
- public static void main(String[] args) throws IOException {
-
- for (int i = 0; i < inputPaths.length; i++) {
- JobConf job = new JobConf(RecordBalance.class);
- job.addResource(new Path(confPath + "/core-site.xml"));
- job.addResource(new Path(confPath + "/mapred-site.xml"));
- job.addResource(new Path(confPath + "/hdfs-site.xml"));
-
- job.setJobName(RecordBalance.class.getSimpleName());
- job.setMapperClass(MapRecordOnly.class);
- job.setReducerClass(ReduceRecordOnly.class);
- job.setMapOutputKeyClass(LongWritable.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setInputFormat(TextInputFormat.class);
- FileInputFormat.setInputPaths(job, inputPaths[i]);
- FileOutputFormat.setOutputPath(job, outputPaths[i]);
- job.setNumReduceTasks(Integer.parseInt(args[0]));
-
- JobClient.runJob(job);
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
deleted file mode 100644
index 9591c32..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/legacy/LegacyTestCase.java
+++ /dev/null
@@ -1,144 +0,0 @@
-package edu.uci.ics.hivesterix.test.legacy;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class LegacyTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- public LegacyTestCase(File queryFile, File resultFile) {
- super("legacy", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!equal(buf, sb)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else
- return false;
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
deleted file mode 100644
index db13676..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestCase.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class OptimizerTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
-
- OptimizerTestCase(File queryFile, File resultFile) {
- super("testOptimizer", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testOptimizer() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- if (query.toLowerCase().indexOf("create") >= 0
- || query.toLowerCase().indexOf("drop") >= 0
- || query.toLowerCase().indexOf("set") >= 0
- || query.toLowerCase().startsWith("\n\ncreate")
- || query.toLowerCase().startsWith("\n\ndrop")
- || query.toLowerCase().startsWith("\n\nset"))
- driver.run(query);
- else
- driver.compile(query);
- driver.clear();
- i++;
- }
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- if (!buf.toString().equals(sw.toString())) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + sw.toString());
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
deleted file mode 100644
index 217f67d..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuitGenerator.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class OptimizerTestSuitGenerator extends AbstractTestSuiteClass {
- private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "plan";
-
- public static Test suite() throws UnsupportedEncodingException,
- FileNotFoundException, IOException {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- OptimizerTestSuitGenerator testSuite = new OptimizerTestSuitGenerator();
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = aqlExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new OptimizerTestSuiteCaseGenerator(qFile,
- rFile));
- }
- }
- return testSuite;
- }
-
- private static String aqlExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
deleted file mode 100644
index e3a4a4e..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuite.java
+++ /dev/null
@@ -1,54 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-
-import junit.framework.Test;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class OptimizerTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/optimizerts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/optimizerts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/optimizerts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "plan";
-
- public static Test suite() throws UnsupportedEncodingException,
- FileNotFoundException, IOException {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- OptimizerTestSuite testSuite = new OptimizerTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("h11_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new OptimizerTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
deleted file mode 100644
index a86dc29..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/optimizer/OptimizerTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,53 +0,0 @@
-package edu.uci.ics.hivesterix.test.optimizer;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class OptimizerTestSuiteCaseGenerator extends AbstractHivesterixTestCase {
- private File resultFile;
-
- OptimizerTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testOptimizer", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testOptimizer() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- if (query.toLowerCase().indexOf("create") >= 0
- || query.toLowerCase().indexOf("drop") >= 0
- || query.toLowerCase().indexOf("set") >= 0
- || query.toLowerCase().startsWith("\n\ncreate")
- || query.toLowerCase().startsWith("\n\ndrop")
- || query.toLowerCase().startsWith("\n\nset"))
- driver.run(query);
- else
- driver.compile(query);
- driver.clear();
- i++;
- }
- sw.close();
- writeStringToFile(resultFile, sw);
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
deleted file mode 100644
index 078de9a..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestCase.java
+++ /dev/null
@@ -1,152 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class RuntimeFunctionTestCase extends AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- RuntimeFunctionTestCase(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
- // Driver driver = new Driver(hconf);
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
-
- StringBuilder buf = new StringBuilder();
- readFileToString(resultFile, buf);
- StringBuffer errorMsg = new StringBuffer();
- if (!equal(buf, sb, errorMsg)) {
- throw new Exception("Result for " + queryFile + " changed:\n"
- + errorMsg.toString());
- }
- deleteDir(resultDirectory);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-
- private boolean equal(StringBuilder sb1, StringBuilder sb2,
- StringBuffer errorMsg) {
- String s1 = sb1.toString();
- String s2 = sb2.toString();
- String[] rowsOne = s1.split("\n");
- String[] rowsTwo = s2.split("\n");
-
- if (rowsOne.length != rowsTwo.length)
- return false;
-
- for (int i = 0; i < rowsOne.length; i++) {
- String row1 = rowsOne[i];
- String row2 = rowsTwo[i];
-
- if (row1.equals(row2))
- continue;
-
- String[] fields1 = row1.split("");
- String[] fields2 = row2.split("");
-
- for (int j = 0; j < fields1.length; j++) {
- if (fields1[j].equals(fields2[j])) {
- continue;
- } else if (fields1[j].indexOf('.') < 0) {
- errorMsg.append("line " + i + " column " + j + ": "
- + fields2[j] + " expected " + fields1[j]);
- return false;
- } else {
- Float float1 = Float.parseFloat(fields1[j]);
- Float float2 = Float.parseFloat(fields2[j]);
-
- if (Math.abs(float1 - float2) == 0)
- continue;
- else {
- errorMsg.append("line " + i + " column " + j + ": "
- + fields2[j] + " expected " + fields1[j]);
- return false;
- }
- }
- }
- }
-
- return true;
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
deleted file mode 100644
index 2093b1d..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuite.java
+++ /dev/null
@@ -1,74 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuite extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuite testSuite = new RuntimeFunctionTestSuite();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile()) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestCase(qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
deleted file mode 100644
index 1b45b41..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteCaseGenerator.java
+++ /dev/null
@@ -1,101 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.ql.Driver;
-import org.junit.Test;
-
-import edu.uci.ics.hivesterix.runtime.config.ConfUtil;
-import edu.uci.ics.hivesterix.test.base.AbstractHivesterixTestCase;
-
-public class RuntimeFunctionTestSuiteCaseGenerator extends
- AbstractHivesterixTestCase {
- private File resultFile;
- private FileSystem dfs;
-
- RuntimeFunctionTestSuiteCaseGenerator(File queryFile, File resultFile) {
- super("testRuntimeFunction", queryFile);
- this.queryFile = queryFile;
- this.resultFile = resultFile;
- }
-
- @Test
- public void testRuntimeFunction() throws Exception {
- StringBuilder queryString = new StringBuilder();
- readFileToString(queryFile, queryString);
- String[] queries = queryString.toString().split(";");
- StringWriter sw = new StringWriter();
-
- HiveConf hconf = ConfUtil.getHiveConf();
- Driver driver = new Driver(hconf, new PrintWriter(sw));
- driver.init();
-
- dfs = FileSystem.get(ConfUtil.getJobConf());
-
- int i = 0;
- for (String query : queries) {
- if (i == queries.length - 1)
- break;
- driver.run(query);
- driver.clear();
- i++;
- }
-
- String warehouse = hconf.get("hive.metastore.warehouse.dir");
- String tableName = removeExt(resultFile.getName());
- String directory = warehouse + "/" + tableName + "/";
- String localDirectory = "tmp";
-
- FileStatus[] files = dfs.listStatus(new Path(directory));
- FileSystem lfs = null;
- if (files == null) {
- lfs = FileSystem.getLocal(ConfUtil.getJobConf());
- files = lfs.listStatus(new Path(directory));
- }
-
- File resultDirectory = new File(localDirectory + "/" + tableName);
- deleteDir(resultDirectory);
- resultDirectory.mkdir();
-
- for (FileStatus fs : files) {
- Path src = fs.getPath();
- if (src.getName().indexOf("crc") >= 0)
- continue;
-
- String destStr = localDirectory + "/" + tableName + "/"
- + src.getName();
- Path dest = new Path(destStr);
- if (lfs != null) {
- lfs.copyToLocalFile(src, dest);
- dfs.copyFromLocalFile(dest, new Path(directory));
- } else
- dfs.copyToLocalFile(src, dest);
- }
-
- File[] rFiles = resultDirectory.listFiles();
- StringBuilder sb = new StringBuilder();
- for (File r : rFiles) {
- if (r.getName().indexOf("crc") >= 0)
- continue;
- readFileToString(r, sb);
- }
- deleteDir(resultDirectory);
-
- writeStringToFile(resultFile, sb);
- }
-
- private void deleteDir(File resultDirectory) {
- if (resultDirectory.exists()) {
- File[] rFiles = resultDirectory.listFiles();
- for (File r : rFiles)
- r.delete();
- resultDirectory.delete();
- }
- }
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
deleted file mode 100644
index a67f475..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/runtimefunction/RuntimeFunctionTestSuiteGenerator.java
+++ /dev/null
@@ -1,75 +0,0 @@
-package edu.uci.ics.hivesterix.test.runtimefunction;
-
-import java.io.File;
-import java.util.List;
-
-import junit.framework.Test;
-import junit.framework.TestResult;
-import edu.uci.ics.hivesterix.test.base.AbstractTestSuiteClass;
-
-public class RuntimeFunctionTestSuiteGenerator extends AbstractTestSuiteClass {
-
- private static final String PATH_TO_QUERIES = "src/test/resources/runtimefunctionts/queries/";
- private static final String PATH_TO_RESULTS = "src/test/resources/runtimefunctionts/results/";
- private static final String PATH_TO_IGNORES = "src/test/resources/runtimefunctionts/ignore.txt";
-
- private static final String FILE_EXTENSION_OF_RESULTS = "result";
-
- public static Test suite() throws Exception {
- List<String> ignores = getIgnoreList(PATH_TO_IGNORES);
- File testData = new File(PATH_TO_QUERIES);
- File[] queries = testData.listFiles();
- RuntimeFunctionTestSuiteGenerator testSuite = new RuntimeFunctionTestSuiteGenerator();
-
- // set hdfs and hyracks cluster, and load test data to hdfs
- try {
- testSuite.setup();
- testSuite.loadData();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
-
- for (File qFile : queries) {
- if (isIgnored(qFile.getName(), ignores))
- continue;
-
- if (qFile.isFile() && qFile.getName().startsWith("q16_")) {
- String resultFileName = hiveExtToResExt(qFile.getName());
- File rFile = new File(PATH_TO_RESULTS + resultFileName);
- testSuite.addTest(new RuntimeFunctionTestSuiteCaseGenerator(
- qFile, rFile));
- }
- }
- return testSuite;
- }
-
- private static String hiveExtToResExt(String fname) {
- int dot = fname.lastIndexOf('.');
- return fname.substring(0, dot + 1) + FILE_EXTENSION_OF_RESULTS;
- }
-
- /**
- * Runs the tests and collects their result in a TestResult.
- */
- @Override
- public void run(TestResult result) {
-
- int testCount = countTestCases();
- for (int i = 0; i < testCount; i++) {
- Test each = this.testAt(i);
- if (result.shouldStop())
- break;
- runTest(each, result);
- }
-
- // cleanup hdfs and hyracks cluster
- try {
- cleanup();
- } catch (Exception e) {
- e.printStackTrace();
- throw new IllegalStateException(e.getMessage());
- }
- }
-
-}
diff --git a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java b/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
deleted file mode 100644
index b5db432..0000000
--- a/hivesterix/src/test/java/edu/uci/ics/hivesterix/test/serde/SerDeTest.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package edu.uci.ics.hivesterix.test.serde;
-
-import java.util.List;
-import java.util.Properties;
-
-import junit.framework.TestCase;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde.Constants;
-import org.apache.hadoop.hive.serde2.SerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.io.ByteWritable;
-import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.ShortWritable;
-import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.io.Writable;
-
-import edu.uci.ics.hivesterix.serde.lazy.LazySerDe;
-
-/**
- * TestLazySimpleSerDe.
- *
- */
-@SuppressWarnings({ "deprecation", "rawtypes" })
-public class SerDeTest extends TestCase {
-
- /**
- * Test the LazySimpleSerDe class.
- */
- public void testLazySimpleSerDe() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- LazySerDe outputSerde = new LazySerDe();
- outputSerde.initialize(conf, tbl);
-
- // Data
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\t1\tqf";
-
- byte[] bytes = s.getBytes();
- Writable bytesWritable = new BytesWritable(bytes);
-
- // Test
- // deserializeAndSerialize(serDe, t, s, expectedFieldsData);
- Object row = serDe.deserialize(bytesWritable); // test my serde
- StructObjectInspector simpleInspector = (StructObjectInspector) serDe
- .getObjectInspector();
- List<Object> fields = simpleInspector
- .getStructFieldsDataAsList(row);
- List<? extends StructField> fieldRefs = simpleInspector
- .getAllStructFieldRefs();
-
- int i = 0;
- for (Object field : fields) {
- BytesWritable fieldWritable = (BytesWritable) outputSerde
- .serialize(field, fieldRefs.get(i)
- .getFieldObjectInspector());
- System.out.print(fieldWritable.getSize() + "|");
- i++;
- }
-
- // Writable output = outputSerde.serialize(row, serDe
- // .getObjectInspector());
- // System.out.println(output);
- //
- // Object row2 = outputSerde.deserialize(output);
- // Writable output2 = serDe.serialize(row2, outputSerde
- // .getObjectInspector());
- // System.out.println(output2);
-
- // System.out.println(output);
- // deserializeAndSerialize(outputSerde, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- private void deserializeAndSerialize(SerDe serDe, Text t, String s,
- Object[] expectedFieldsData) throws SerDeException {
- // Get the row structure
- StructObjectInspector oi = (StructObjectInspector) serDe
- .getObjectInspector();
- List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
- assertEquals(8, fieldRefs.size());
-
- // Deserialize
- Object row = serDe.deserialize(t);
- for (int i = 0; i < fieldRefs.size(); i++) {
- Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
- if (fieldData != null) {
- fieldData = ((LazyPrimitive) fieldData).getWritableObject();
- }
- assertEquals("Field " + i, expectedFieldsData[i], fieldData);
- }
- // Serialize
- assertEquals(Text.class, serDe.getSerializedClass());
- Text serializedText = (Text) serDe.serialize(row, oi);
- assertEquals("Serialized data", s, serializedText.toString());
- }
-
- private Properties createProperties() {
- Properties tbl = new Properties();
-
- // Set the configuration parameters
- tbl.setProperty(Constants.SERIALIZATION_FORMAT, "9");
- tbl.setProperty("columns",
- "abyte,ashort,aint,along,adouble,astring,anullint,anullstring");
- tbl.setProperty("columns.types",
- "tinyint:smallint:int:bigint:double:string:int:string");
- tbl.setProperty(Constants.SERIALIZATION_NULL_FORMAT, "NULL");
- return tbl;
- }
-
- /**
- * Test the LazySimpleSerDe class with LastColumnTakesRest option.
- */
- public void testLazySimpleSerDeLastColumnTakesRest() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- tbl.setProperty(Constants.SERIALIZATION_LAST_COLUMN_TAKES_REST,
- "true");
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text(
- "123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta\tb\t";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text("hive and hadoop"), null, new Text("a\tb\t") };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- /**
- * Test the LazySimpleSerDe class with extra columns.
- */
- public void testLazySimpleSerDeExtraColumns() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text(
- "123\t456\t789\t1000\t5.3\thive and hadoop\t1.\ta\tb\t");
- String s = "123\t456\t789\t1000\t5.3\thive and hadoop\tNULL\ta";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text("hive and hadoop"), null, new Text("a") };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
- /**
- * Test the LazySimpleSerDe class with missing columns.
- */
- public void testLazySimpleSerDeMissingColumns() throws Throwable {
- try {
- // Create the SerDe
- LazySimpleSerDe serDe = new LazySimpleSerDe();
- Configuration conf = new Configuration();
- Properties tbl = createProperties();
- serDe.initialize(conf, tbl);
-
- // Data
- Text t = new Text("123\t456\t789\t1000\t5.3\t");
- String s = "123\t456\t789\t1000\t5.3\t\tNULL\tNULL";
- Object[] expectedFieldsData = { new ByteWritable((byte) 123),
- new ShortWritable((short) 456), new IntWritable(789),
- new LongWritable(1000), new DoubleWritable(5.3),
- new Text(""), null, null };
-
- // Test
- deserializeAndSerialize(serDe, t, s, expectedFieldsData);
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
- }
-
-}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
index 541f54e..9092655 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/pom.xml
@@ -64,6 +64,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>1.0.4</value>
+ </property>
</activation>
<id>hadoop-1.0.4</id>
<dependencies>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
index a2b16c6..16ce76b 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.20.2/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
@@ -1,7 +1,8 @@
package edu.uci.ics.hyracks.hdfs;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
@@ -14,12 +15,25 @@
public class ContextFactory {
@SuppressWarnings({ "unchecked", "rawtypes" })
- public TaskAttemptContext createContext(Configuration conf, InputSplit split) throws HyracksDataException {
+ public TaskAttemptContext createContext(Configuration conf, TaskAttemptID tid) throws HyracksDataException {
try {
- return new Mapper().new Context(conf, new TaskAttemptID(), null, null, null, null, split);
+ return new Mapper().new Context(conf, tid, null, null, null, null, null);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
+ public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
+ try {
+ TaskAttemptID tid = new TaskAttemptID("", 0, true, partition, 0);
+ return new TaskAttemptContext(conf, tid);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public JobContext createJobContext(Configuration conf) {
+ return new JobContext(conf, new JobID("0", 0));
+ }
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
index a848ef9..8b7ecf0 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/pom.xml
@@ -41,6 +41,10 @@
<profile>
<activation>
<activeByDefault>true</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.1</value>
+ </property>
</activation>
<id>hadoop-0.23.1</id>
<dependencies>
@@ -78,6 +82,10 @@
<id>hadoop-0.23.6</id>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.6</value>
+ </property>
</activation>
<dependencies>
<dependency>
@@ -110,6 +118,86 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.2</value>
+ </property>
+ </activation>
+ <id>cdh-4.2</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>2.0.0-cdh4.2.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.1</value>
+ </property>
+ </activation>
+ <id>cdh-4.1</id>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-mapreduce-client-core</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-hdfs</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-minicluster</artifactId>
+ <version>2.0.0-cdh4.1.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
<dependencies>
@@ -121,4 +209,11 @@
<scope>compile</scope>
</dependency>
</dependencies>
+
+ <repositories>
+ <repository>
+ <id>cloudera</id>
+ <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
+ </repository>
+ </repositories>
</project>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
index 60ae5d3..ddcce64 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-0.23.1/src/main/java/edu/uci/ics/hyracks/hdfs/ContextFactory.java
@@ -1,9 +1,12 @@
package edu.uci.ics.hyracks.hdfs;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
+import org.apache.hadoop.mapreduce.TaskType;
+import org.apache.hadoop.mapreduce.task.JobContextImpl;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -13,12 +16,25 @@
*/
public class ContextFactory {
- public TaskAttemptContext createContext(Configuration conf, InputSplit split) throws HyracksDataException {
+ public TaskAttemptContext createContext(Configuration conf, TaskAttemptID tid) throws HyracksDataException {
try {
- return new TaskAttemptContextImpl(conf, new TaskAttemptID());
+ return new TaskAttemptContextImpl(conf, tid);
} catch (Exception e) {
throw new HyracksDataException(e);
}
}
+ public TaskAttemptContext createContext(Configuration conf, int partition) throws HyracksDataException {
+ try {
+ TaskAttemptID tid = new TaskAttemptID("", 0, TaskType.REDUCE, partition, 0);
+ return new TaskAttemptContextImpl(conf, tid);
+ } catch (Exception e) {
+ throw new HyracksDataException(e);
+ }
+ }
+
+ public JobContext createJobContext(Configuration conf) {
+ return new JobContextImpl(conf, new JobID("0", 0));
+ }
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
index 5887601..a28c698a 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/pom.xml
@@ -76,6 +76,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>1.0.4</value>
+ </property>
</activation>
<id>hadoop-1.0.4</id>
<dependencies>
@@ -91,6 +95,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.1</value>
+ </property>
</activation>
<id>hadoop-0.23.1</id>
<dependencies>
@@ -106,6 +114,10 @@
<profile>
<activation>
<activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>0.23.6</value>
+ </property>
</activation>
<id>hadoop-0.23.6</id>
<dependencies>
@@ -118,6 +130,44 @@
</dependency>
</dependencies>
</profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.1</value>
+ </property>
+ </activation>
+ <id>cdh-4.1</id>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-0.23.1</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
+ <profile>
+ <activation>
+ <activeByDefault>false</activeByDefault>
+ <property>
+ <name>hadoop</name>
+ <value>cdh-4.2</value>
+ </property>
+ </activation>
+ <id>cdh-4.2</id>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-0.23.1</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+ </profile>
</profiles>
<dependencies>
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
index 5923e1e..5d35ec5 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParser.java
@@ -29,12 +29,24 @@
public interface IKeyValueParser<K, V> {
/**
+ * Initialize the key value parser.
+ *
+ * @param writer
+ * The hyracks writer for outputting data.
+ * @throws HyracksDataException
+ */
+ public void open(IFrameWriter writer) throws HyracksDataException;
+
+ /**
* Parse a key-value pair returned by HDFS record reader to a tuple.
* when the parsers' internal buffer is full, it can flush the buffer to the writer
*
* @param key
+ * The key returned from Hadoop's InputReader.
* @param value
+ * The value returned from Hadoop's InputReader.
* @param writer
+ * The hyracks writer for outputting data.
* @throws HyracksDataException
*/
public void parse(K key, V value, IFrameWriter writer) throws HyracksDataException;
@@ -44,7 +56,8 @@
* This method is called in the close() of HDFSReadOperatorDescriptor.
*
* @param writer
+ * The hyracks writer for outputting data.
* @throws HyracksDataException
*/
- public void flush(IFrameWriter writer) throws HyracksDataException;
+ public void close(IFrameWriter writer) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
index 6e943ad..7d6f868 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/IKeyValueParserFactory.java
@@ -18,6 +18,7 @@
import java.io.Serializable;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
/**
* Users need to implement this interface to use the HDFSReadOperatorDescriptor.
@@ -36,6 +37,6 @@
* the IHyracksTaskContext
* @return a key-value parser instance.
*/
- public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx);
+ public IKeyValueParser<K, V> createKeyValueParser(IHyracksTaskContext ctx) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
index 25b9523..8e85627 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriter.java
@@ -26,6 +26,15 @@
public interface ITupleWriter {
/**
+ * Initialize the the tuple writer.
+ *
+ * @param output
+ * The channel for output data.
+ * @throws HyracksDataException
+ */
+ public void open(DataOutput output) throws HyracksDataException;
+
+ /**
* Write the tuple to the DataOutput.
*
* @param output
@@ -36,4 +45,13 @@
*/
public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException;
+ /**
+ * Close the writer.
+ *
+ * @param output
+ * The channel for output data.
+ * @throws HyracksDataException
+ */
+ public void close(DataOutput output) throws HyracksDataException;
+
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
index 839de8f..9a025c2 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/api/ITupleWriterFactory.java
@@ -17,14 +17,19 @@
import java.io.Serializable;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
/**
* Users need to implement this interface to use the HDFSWriteOperatorDescriptor.
*/
public interface ITupleWriterFactory extends Serializable {
/**
+ * @param ctx
+ * the IHyracksTaskContext
* @return a tuple writer instance
*/
- public ITupleWriter getTupleWriter();
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) throws HyracksDataException;
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
index e924650..f49688b 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSReadOperatorDescriptor.java
@@ -102,6 +102,7 @@
JobConf conf = confFactory.getConf();
IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
writer.open();
+ parser.open(writer);
InputFormat inputFormat = conf.getInputFormat();
for (int i = 0; i < inputSplits.length; i++) {
/**
@@ -131,7 +132,7 @@
}
}
}
- parser.flush(writer);
+ parser.close(writer);
writer.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
index ff97a29..36d5f55 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/dataflow/HDFSWriteOperatorDescriptor.java
@@ -89,7 +89,7 @@
String outputDirPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputDirPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter();
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
try {
FileSystem dfs = FileSystem.get(conf);
dos = dfs.create(new Path(fileName), true);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
index c691f5d..9574bb4 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextKeyValueParserFactory.java
@@ -43,6 +43,11 @@
return new IKeyValueParser<LongWritable, Text>() {
@Override
+ public void open(IFrameWriter writer) {
+
+ }
+
+ @Override
public void parse(LongWritable key, Text value, IFrameWriter writer) throws HyracksDataException {
tb.reset();
tb.addField(value.getBytes(), 0, value.getLength());
@@ -56,7 +61,7 @@
}
@Override
- public void flush(IFrameWriter writer) throws HyracksDataException {
+ public void close(IFrameWriter writer) throws HyracksDataException {
FrameUtils.flushFrame(buffer, writer);
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
index d26721d..0da14e5 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/lib/TextTupleWriterFactory.java
@@ -17,6 +17,7 @@
import java.io.DataOutput;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
import edu.uci.ics.hyracks.hdfs.api.ITupleWriter;
@@ -26,9 +27,14 @@
private static final long serialVersionUID = 1L;
@Override
- public ITupleWriter getTupleWriter() {
+ public ITupleWriter getTupleWriter(IHyracksTaskContext ctx) {
return new ITupleWriter() {
- byte newLine = "\n".getBytes()[0];
+ private byte newLine = "\n".getBytes()[0];
+
+ @Override
+ public void open(DataOutput output) {
+
+ }
@Override
public void write(DataOutput output, ITupleReference tuple) throws HyracksDataException {
@@ -43,6 +49,11 @@
}
}
+ @Override
+ public void close(DataOutput output) {
+
+ }
+
};
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
index e7309d4..3f287cf 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs/scheduler/Scheduler.java
@@ -17,6 +17,7 @@
import java.io.IOException;
import java.net.InetAddress;
+import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@@ -49,7 +50,7 @@
private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
/**
- * The constructor of the scheduler
+ * The constructor of the scheduler.
*
* @param ncNameToNcInfos
* @throws HyracksException
@@ -64,12 +65,20 @@
}
}
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos the mapping from nc names to nc infos
+ * @throws HyracksException
+ */
public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
loadIPAddressToNCMap(ncNameToNcInfos);
}
/**
- * Set location constraints for a file scan operator with a list of file splits
+ * Set location constraints for a file scan operator with a list of file splits.
+ * It guarantees the maximum slots a machine can is at most one more than the minimum slots a
+ * machine can get.
*
* @throws HyracksDataException
*/
@@ -77,93 +86,38 @@
int[] capacity = new int[NCs.length];
Arrays.fill(capacity, 0);
String[] locations = new String[splits.length];
- int slots = splits.length % capacity.length == 0 ? (splits.length / capacity.length) : (splits.length
+ /**
+ * upper bound number of slots that a machine can get
+ */
+ int upperBoundSlots = splits.length % capacity.length == 0 ? (splits.length / capacity.length) : (splits.length
/ capacity.length + 1);
+ /**
+ * lower bound number of slots that a machine can get
+ */
+ int lowerBoundSlots = splits.length % capacity.length == 0 ? upperBoundSlots : upperBoundSlots - 1;
try {
Random random = new Random(System.currentTimeMillis());
boolean scheduled[] = new boolean[splits.length];
Arrays.fill(scheduled, false);
- for (int i = 0; i < splits.length; i++) {
- /**
- * get the location of all the splits
- */
- String[] loc = splits[i].getLocations();
- if (loc.length > 0) {
- for (int j = 0; j < loc.length; j++) {
- /**
- * get all the IP addresses from the name
- */
- InetAddress[] allIps = InetAddress.getAllByName(loc[j]);
- /**
- * iterate overa all ips
- */
- for (InetAddress ip : allIps) {
- /**
- * if the node controller exists
- */
- if (ipToNcMapping.get(ip.getHostAddress()) != null) {
- /**
- * set the ncs
- */
- List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
- int arrayPos = random.nextInt(dataLocations.size());
- String nc = dataLocations.get(arrayPos);
- int pos = ncNameToIndex.get(nc);
- /**
- * check if the node is already full
- */
- if (capacity[pos] < slots) {
- locations[i] = nc;
- capacity[pos]++;
- scheduled[i] = true;
- }
- }
- }
-
- /**
- * break the loop for data-locations if the schedule has already been found
- */
- if (scheduled[i] == true) {
- break;
- }
- }
- }
- }
+ /**
+ * push data-local lower-bounds slots to each machine
+ */
+ scheduleLocalSlots(splits, capacity, locations, lowerBoundSlots, random, scheduled);
+ /**
+ * push data-local upper-bounds slots to each machine
+ */
+ scheduleLocalSlots(splits, capacity, locations, upperBoundSlots, random, scheduled);
/**
- * find the lowest index the current available NCs
+ * push non-data-local lower-bounds slots to each machine
*/
- int currentAvailableNC = 0;
- for (int i = 0; i < capacity.length; i++) {
- if (capacity[i] < slots) {
- currentAvailableNC = i;
- break;
- }
- }
-
+ scheduleNoLocalSlots(splits, capacity, locations, lowerBoundSlots, scheduled);
/**
- * schedule no-local file reads
+ * push non-data-local upper-bounds slots to each machine
*/
- for (int i = 0; i < splits.length; i++) {
- // if there is no data-local NC choice, choose a random one
- if (!scheduled[i]) {
- locations[i] = NCs[currentAvailableNC];
- capacity[currentAvailableNC]++;
- scheduled[i] = true;
-
- /**
- * move the available NC cursor to the next one
- */
- for (int j = currentAvailableNC; j < capacity.length; j++) {
- if (capacity[j] < slots) {
- currentAvailableNC = j;
- break;
- }
- }
- }
- }
+ scheduleNoLocalSlots(splits, capacity, locations, upperBoundSlots, scheduled);
return locations;
} catch (IOException e) {
throw new HyracksException(e);
@@ -171,6 +125,124 @@
}
/**
+ * Schedule non-local slots to each machine
+ *
+ * @param splits
+ * The HDFS file splits.
+ * @param capacity
+ * The current capacity of each machine.
+ * @param locations
+ * The result schedule.
+ * @param slots
+ * The maximum slots of each machine.
+ * @param scheduled
+ * Indicate which slot is scheduled.
+ */
+ private void scheduleNoLocalSlots(InputSplit[] splits, int[] capacity, String[] locations, int slots,
+ boolean[] scheduled) {
+ /**
+ * find the lowest index the current available NCs
+ */
+ int currentAvailableNC = 0;
+ for (int i = 0; i < capacity.length; i++) {
+ if (capacity[i] < slots) {
+ currentAvailableNC = i;
+ break;
+ }
+ }
+
+ /**
+ * schedule no-local file reads
+ */
+ for (int i = 0; i < splits.length; i++) {
+ // if there is no data-local NC choice, choose a random one
+ if (!scheduled[i]) {
+ locations[i] = NCs[currentAvailableNC];
+ capacity[currentAvailableNC]++;
+ scheduled[i] = true;
+
+ /**
+ * move the available NC cursor to the next one
+ */
+ for (int j = currentAvailableNC; j < capacity.length; j++) {
+ if (capacity[j] < slots) {
+ currentAvailableNC = j;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Schedule data-local slots to each machine.
+ *
+ * @param splits
+ * The HDFS file splits.
+ * @param capacity
+ * The current capacity of each machine.
+ * @param locations
+ * The result schedule.
+ * @param slots
+ * The maximum slots of each machine.
+ * @param random
+ * The random generator.
+ * @param scheduled
+ * Indicate which slot is scheduled.
+ * @throws IOException
+ * @throws UnknownHostException
+ */
+ private void scheduleLocalSlots(InputSplit[] splits, int[] capacity, String[] locations, int slots, Random random,
+ boolean[] scheduled) throws IOException, UnknownHostException {
+ for (int i = 0; i < splits.length; i++) {
+ /**
+ * get the location of all the splits
+ */
+ String[] loc = splits[i].getLocations();
+ if (loc.length > 0) {
+ for (int j = 0; j < loc.length; j++) {
+ /**
+ * get all the IP addresses from the name
+ */
+ InetAddress[] allIps = InetAddress.getAllByName(loc[j]);
+ /**
+ * iterate overa all ips
+ */
+ for (InetAddress ip : allIps) {
+ /**
+ * if the node controller exists
+ */
+ if (ipToNcMapping.get(ip.getHostAddress()) != null) {
+ /**
+ * set the ncs
+ */
+ List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
+ int arrayPos = random.nextInt(dataLocations.size());
+ String nc = dataLocations.get(arrayPos);
+ int pos = ncNameToIndex.get(nc);
+ /**
+ * check if the node is already full
+ */
+ if (capacity[pos] < slots) {
+ locations[i] = nc;
+ capacity[pos]++;
+ scheduled[i] = true;
+ }
+ }
+ }
+
+ /**
+ * break the loop for data-locations if the schedule has already been found
+ */
+ if (scheduled[i] == true) {
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /**
* Load the IP-address-to-NC map from the NCNameToNCInfoMap
*
* @param ncNameToNcInfos
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
index 90f5603..9e9abdf 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSReadOperatorDescriptor.java
@@ -139,8 +139,7 @@
/**
* read the split
*/
- TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(),
- inputSplits.get(i));
+ TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
reader.initialize(inputSplits.get(i), context);
while (reader.nextKeyValue() == true) {
@@ -148,7 +147,7 @@
}
}
}
- parser.flush(writer);
+ parser.close(writer);
writer.close();
} catch (Exception e) {
throw new HyracksDataException(e);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
index 390a7b5..86ee527 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/dataflow/HDFSWriteOperatorDescriptor.java
@@ -88,7 +88,7 @@
String outputPath = FileOutputFormat.getOutputPath(conf).toString();
String fileName = outputPath + File.separator + "part-" + partition;
- tupleWriter = tupleWriterFactory.getTupleWriter();
+ tupleWriter = tupleWriterFactory.getTupleWriter(ctx);
try {
FileSystem dfs = FileSystem.get(conf.getConfiguration());
dos = dfs.create(new Path(fileName), true);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
index 3445d68..cb97ca1 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/Scheduler.java
@@ -15,18 +15,11 @@
package edu.uci.ics.hyracks.hdfs2.scheduler;
-import java.net.InetAddress;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import org.apache.hadoop.mapreduce.InputSplit;
-import edu.uci.ics.hyracks.api.client.HyracksConnection;
-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.exceptions.HyracksException;
@@ -35,16 +28,10 @@
* The scheduler conduct data-local scheduling for data reading on HDFS.
* This class works for Hadoop new API.
*/
+@SuppressWarnings("deprecation")
public class Scheduler {
- /** a list of NCs */
- private String[] NCs;
-
- /** a map from ip to NCs */
- private Map<String, List<String>> ipToNcMapping = new HashMap<String, List<String>>();
-
- /** a map from the NC name to the index */
- private Map<String, Integer> ncNameToIndex = new HashMap<String, Integer>();
+ private edu.uci.ics.hyracks.hdfs.scheduler.Scheduler scheduler;
/**
* The constructor of the scheduler
@@ -53,17 +40,18 @@
* @throws HyracksException
*/
public Scheduler(String ipAddress, int port) throws HyracksException {
- try {
- IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);
- Map<String, NodeControllerInfo> ncNameToNcInfos = hcc.getNodeControllerInfos();
- loadIPAddressToNCMap(ncNameToNcInfos);
- } catch (Exception e) {
- throw new HyracksException(e);
- }
+ scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ipAddress, port);
}
+ /**
+ * The constructor of the scheduler.
+ *
+ * @param ncNameToNcInfos
+ * the mapping from nc names to nc infos
+ * @throws HyracksException
+ */
public Scheduler(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
- loadIPAddressToNCMap(ncNameToNcInfos);
+ scheduler = new edu.uci.ics.hyracks.hdfs.scheduler.Scheduler(ncNameToNcInfos);
}
/**
@@ -72,135 +60,11 @@
* @throws HyracksDataException
*/
public String[] getLocationConstraints(List<InputSplit> splits) throws HyracksException {
- int[] capacity = new int[NCs.length];
- Arrays.fill(capacity, 0);
- String[] locations = new String[splits.size()];
- int slots = splits.size() % capacity.length == 0 ? (splits.size() / capacity.length) : (splits.size()
- / capacity.length + 1);
-
try {
- Random random = new Random(System.currentTimeMillis());
- boolean scheduled[] = new boolean[splits.size()];
- Arrays.fill(scheduled, false);
-
- for (int i = 0; i < splits.size(); i++) {
- /**
- * get the location of all the splits
- */
- String[] loc = splits.get(i).getLocations();
- if (loc.length > 0) {
- for (int j = 0; j < loc.length; j++) {
- /**
- * get all the IP addresses from the name
- */
- InetAddress[] allIps = InetAddress.getAllByName(loc[j]);
- /**
- * iterate overa all ips
- */
- for (InetAddress ip : allIps) {
- /**
- * if the node controller exists
- */
- if (ipToNcMapping.get(ip.getHostAddress()) != null) {
- /**
- * set the ncs
- */
- List<String> dataLocations = ipToNcMapping.get(ip.getHostAddress());
- int arrayPos = random.nextInt(dataLocations.size());
- String nc = dataLocations.get(arrayPos);
- int pos = ncNameToIndex.get(nc);
- /**
- * check if the node is already full
- */
- if (capacity[pos] < slots) {
- locations[i] = nc;
- capacity[pos]++;
- scheduled[i] = true;
- }
- }
- }
-
- /**
- * break the loop for data-locations if the schedule has already been found
- */
- if (scheduled[i] == true) {
- break;
- }
- }
- }
- }
-
- /**
- * find the lowest index the current available NCs
- */
- int currentAvailableNC = 0;
- for (int i = 0; i < capacity.length; i++) {
- if (capacity[i] < slots) {
- currentAvailableNC = i;
- break;
- }
- }
-
- /**
- * schedule no-local file reads
- */
- for (int i = 0; i < splits.size(); i++) {
- // if there is no data-local NC choice, choose a random one
- if (!scheduled[i]) {
- locations[i] = NCs[currentAvailableNC];
- capacity[currentAvailableNC]++;
- scheduled[i] = true;
-
- /**
- * move the available NC cursor to the next one
- */
- for (int j = currentAvailableNC; j < capacity.length; j++) {
- if (capacity[j] < slots) {
- currentAvailableNC = j;
- break;
- }
- }
- }
- }
- return locations;
- } catch (Exception e) {
- throw new HyracksException(e);
- }
- }
-
- /**
- * Load the IP-address-to-NC map from the NCNameToNCInfoMap
- *
- * @param ncNameToNcInfos
- * @throws HyracksException
- */
- private void loadIPAddressToNCMap(Map<String, NodeControllerInfo> ncNameToNcInfos) throws HyracksException {
- try {
- NCs = new String[ncNameToNcInfos.size()];
- int i = 0;
-
- /**
- * build the IP address to NC map
- */
- for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
- String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().getIpAddress())
- .getHostAddress();
- List<String> matchedNCs = ipToNcMapping.get(ipAddr);
- if (matchedNCs == null) {
- matchedNCs = new ArrayList<String>();
- ipToNcMapping.put(ipAddr, matchedNCs);
- }
- matchedNCs.add(entry.getKey());
- NCs[i] = entry.getKey();
- i++;
- }
-
- /**
- * set up the NC name to index mapping
- */
- for (i = 0; i < NCs.length; i++) {
- ncNameToIndex.put(NCs[i], i);
- }
+ org.apache.hadoop.mapred.InputSplit[] inputSplits = new org.apache.hadoop.mapred.InputSplit[splits.size()];
+ for (int i = 0; i < inputSplits.length; i++)
+ inputSplits[i] = new WrappedFileSplit(splits.get(i).getLocations(), splits.get(i).getLength());
+ return scheduler.getLocationConstraints(inputSplits);
} catch (Exception e) {
throw new HyracksException(e);
}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java
new file mode 100644
index 0000000..1deb469
--- /dev/null
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/main/java/edu/uci/ics/hyracks/hdfs2/scheduler/WrappedFileSplit.java
@@ -0,0 +1,51 @@
+package edu.uci.ics.hyracks.hdfs2.scheduler;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.mapred.InputSplit;
+
+/**
+ * The wrapped implementation of InputSplit, for the new API scheduler
+ * to reuse the old API scheduler
+ */
+@SuppressWarnings("deprecation")
+public class WrappedFileSplit implements InputSplit {
+
+ private String[] locations;
+ private long length;
+
+ public WrappedFileSplit(String[] locations, long length) {
+ this.locations = locations;
+ this.length = length;
+ }
+
+ @Override
+ public void readFields(DataInput input) throws IOException {
+ int len = input.readInt();
+ locations = new String[len];
+ for (int i = 0; i < len; i++)
+ locations[i] = input.readUTF();
+ length = input.readLong();
+ }
+
+ @Override
+ public void write(DataOutput output) throws IOException {
+ output.write(locations.length);
+ for (int i = 0; i < locations.length; i++)
+ output.writeUTF(locations[i]);
+ output.writeLong(length);
+ }
+
+ @Override
+ public long getLength() throws IOException {
+ return length;
+ }
+
+ @Override
+ public String[] getLocations() throws IOException {
+ return locations;
+ }
+
+}
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
index 4b8a278..c6b0416 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs/scheduler/SchedulerTest.java
@@ -199,8 +199,8 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc3", "nc4", "nc2",
- "nc4", "nc5", "nc5" };
+ String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
+ "nc5", "nc5", "nc6" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
diff --git a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
index ea2af13..4b1e11c 100644
--- a/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
+++ b/hyracks/hyracks-hdfs/hyracks-hdfs-core/src/test/java/edu/uci/ics/hyracks/hdfs2/scheduler/SchedulerTest.java
@@ -34,7 +34,6 @@
/**
* Test case for the new HDFS API scheduler
- *
*/
public class SchedulerTest extends TestCase {
@@ -204,8 +203,8 @@
Scheduler scheduler = new Scheduler(ncNameToNcInfos);
String[] locationConstraints = scheduler.getLocationConstraints(fileSplits);
- String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc3", "nc4", "nc2",
- "nc4", "nc5", "nc5" };
+ String[] expectedResults = new String[] { "nc1", "nc3", "nc4", "nc2", "nc3", "nc2", "nc1", "nc4", "nc5", "nc6",
+ "nc5", "nc5", "nc6" };
for (int i = 0; i < locationConstraints.length; i++) {
Assert.assertEquals(locationConstraints[i], expectedResults[i]);
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
index a8cd3db..3a98fd9 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/Vertex.java
@@ -73,6 +73,8 @@
private boolean updated = false;
/** has outgoing messages */
private boolean hasMessage = false;
+ /** created new vertex */
+ private boolean createdNewLiveVertex = false;
/**
* use object pool for re-using objects
@@ -258,6 +260,7 @@
halt = in.readBoolean();
updated = false;
hasMessage = false;
+ createdNewLiveVertex = false;
}
@Override
@@ -369,6 +372,13 @@
}
/**
+ * Pregelix internal use only
+ */
+ public boolean createdNewLiveVertex() {
+ return this.createdNewLiveVertex;
+ }
+
+ /**
* sort the edges
*/
@SuppressWarnings("unchecked")
@@ -449,7 +459,8 @@
* @param vertex
* the vertex
*/
- public final void addVertex(I vertexId, V vertex) {
+ public final void addVertex(I vertexId, Vertex vertex) {
+ createdNewLiveVertex |= !vertex.isHalted();
delegate.addVertex(vertexId, vertex);
}
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
index d949bc5..4b153c1 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/graph/VertexDelegate.java
@@ -105,7 +105,7 @@
this.vertexId = vertexId;
}
- public final void addVertex(I vertexId, V vertex) {
+ public final void addVertex(I vertexId, Vertex vertex) {
try {
insertTb.reset();
DataOutput outputInsert = insertTb.getDataOutput();
@@ -114,6 +114,19 @@
vertex.write(outputInsert);
insertTb.addFieldEndOffset();
FrameTupleUtils.flushTuple(appenderInsert, insertTb, insertWriter);
+
+ /**
+ * push alive when necessary
+ */
+ if (pushAlive && !vertex.isHalted()) {
+ alive.reset();
+ DataOutput outputAlive = alive.getDataOutput();
+ vertexId.write(outputAlive);
+ alive.addFieldEndOffset();
+ dummyMessageList.write(outputAlive);
+ alive.addFieldEndOffset();
+ FrameTupleUtils.flushTuple(appenderAlive, alive, aliveWriter);
+ }
} catch (Exception e) {
throw new IllegalStateException(e);
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
index 727e7fe..b1730d3 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
@@ -246,28 +246,28 @@
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
/** connect all operators **/
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
- 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 4,
+ deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
- spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 5, btreeBulkLoad, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), scanner, 5,
+ btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, partionFactory, keyFields, sortCmpFactories),
localGby, 0, globalGby, 0);
@@ -282,7 +282,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -470,7 +470,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
/** connect all operators **/
@@ -479,20 +479,22 @@
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, setUnion, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), setUnion, 0, join, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), join, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 3,
+ insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 4,
+ deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
- spec.connect(new OneToOneConnectorDescriptor(spec), join, 5, btreeBulkLoad, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, partionFactory), join, 5, btreeBulkLoad, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
spec.connect(new MToNPartitioningMergingConnectorDescriptor(spec, partionFactory, keyFields, sortCmpFactories),
localGby, 0, globalGby, 0);
@@ -506,7 +508,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
index 9bad169..87f2156 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
@@ -189,7 +189,7 @@
TerminationStateWriterOperatorDescriptor terminateWriter = new TerminationStateWriterOperatorDescriptor(spec,
configurationFactory, jobId);
PartitionConstraintHelper.addPartitionCountConstraint(spec, terminateWriter, 1);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
/**
* final aggregate write operator
@@ -233,18 +233,18 @@
spec.connect(new OneToOneConnectorDescriptor(spec), emptyTupleSource, 0, preSuperStep, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, scanner, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), scanner, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 3, insertOp,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 3, insertOp,
0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), scanner, 4, deleteOp,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), scanner, 4, deleteOp,
0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
@@ -264,7 +264,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -432,7 +432,7 @@
EmptySinkOperatorDescriptor emptySink4 = new EmptySinkOperatorDescriptor(spec);
ClusterConfig.setLocationConstraint(spec, emptySink4);
- ITuplePartitionComputerFactory hashPartitionComputerFactory = new MergePartitionComputerFactory();
+ ITuplePartitionComputerFactory unifyingPartitionComputerFactory = new MergePartitionComputerFactory();
ITuplePartitionComputerFactory partionFactory = new VertexIdPartitionComputerFactory(
rdUnnestedMessage.getFields()[0]);
@@ -441,17 +441,17 @@
spec.connect(new OneToOneConnectorDescriptor(spec), preSuperStep, 0, materializeRead, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), materializeRead, 0, join, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), join, 0, localSort, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 1,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 1,
terminateWriter, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 2,
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 2,
finalAggregator, 0);
/**
* connect the insert/delete operator
*/
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 3, insertOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 3, insertOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), insertOp, 0, emptySink3, 0);
- spec.connect(new MToNPartitioningConnectorDescriptor(spec, hashPartitionComputerFactory), join, 4, deleteOp, 0);
+ spec.connect(new MToNPartitioningConnectorDescriptor(spec, unifyingPartitionComputerFactory), join, 4, deleteOp, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), deleteOp, 0, emptySink4, 0);
spec.connect(new OneToOneConnectorDescriptor(spec), localSort, 0, localGby, 0);
@@ -465,7 +465,7 @@
spec.addRoot(finalAggregator);
spec.addRoot(emptySink);
- spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
index ffdef10..6ea258e 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
@@ -50,12 +50,12 @@
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableComparingBinaryComparatorFactory;
import edu.uci.ics.pregelix.core.util.DataflowUtils;
+import edu.uci.ics.pregelix.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.FinalAggregateOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.MaterializingReadOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
-import edu.uci.ics.pregelix.dataflow.NonCombinerConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
import edu.uci.ics.pregelix.dataflow.std.BTreeSearchFunctionUpdateOperatorDescriptor;
@@ -135,7 +135,7 @@
comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
new BTreeDataflowHelperFactory(), inputRdFactory, 5,
new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, scanner);
/**
@@ -252,7 +252,7 @@
spec.addRoot(emptySink3);
spec.addRoot(emptySink4);
- spec.setConnectorPolicyAssignmentPolicy(new NonCombinerConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
@@ -329,7 +329,7 @@
leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -441,7 +441,7 @@
spec.addRoot(finalAggregator);
spec.addRoot(emptySink);
- spec.setConnectorPolicyAssignmentPolicy(new NonCombinerConnectorPolicyAssignmentPolicy());
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
spec.setFrameSize(frameSize);
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
index cc12523..c6dabb0 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
@@ -50,6 +50,7 @@
import edu.uci.ics.pregelix.core.jobgen.clusterconfig.ClusterConfig;
import edu.uci.ics.pregelix.core.runtime.touchpoint.WritableComparingBinaryComparatorFactory;
import edu.uci.ics.pregelix.core.util.DataflowUtils;
+import edu.uci.ics.pregelix.dataflow.ConnectorPolicyAssignmentPolicy;
import edu.uci.ics.pregelix.dataflow.EmptySinkOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.EmptyTupleSourceOperatorDescriptor;
import edu.uci.ics.pregelix.dataflow.FinalAggregateOperatorDescriptor;
@@ -131,7 +132,7 @@
comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
new BTreeDataflowHelperFactory(), inputRdFactory, 5,
new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, scanner);
/**
@@ -266,6 +267,7 @@
spec.addRoot(emptySink4);
spec.setFrameSize(frameSize);
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
return spec;
}
@@ -341,7 +343,7 @@
leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
- rdPartialAggregate);
+ rdPartialAggregate, rdInsert, rdDelete);
ClusterConfig.setLocationConstraint(spec, join);
/**
@@ -474,6 +476,7 @@
spec.addRoot(emptySink4);
spec.setFrameSize(frameSize);
+ spec.setConnectorPolicyAssignmentPolicy(new ConnectorPolicyAssignmentPolicy(spec));
return spec;
}
diff --git a/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh b/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
index 629bd90..d30da26 100644
--- a/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
+++ b/pregelix/pregelix-core/src/main/resources/scripts/startAllNCs.sh
@@ -2,5 +2,5 @@
for i in `cat conf/slaves`
do
- ssh $i "cd ${PREGELIX_PATH}; bin/startnc.sh"
+ ssh $i "cd ${PREGELIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
done
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
index d29afca..ae47ed8 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/ConnectorPolicyAssignmentPolicy.java
@@ -15,25 +15,44 @@
package edu.uci.ics.pregelix.dataflow;
+import org.apache.commons.lang3.tuple.Pair;
+
import edu.uci.ics.hyracks.api.dataflow.IConnectorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.IConnectorPolicyAssignmentPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.PipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedBlockingConnectorPolicy;
import edu.uci.ics.hyracks.api.dataflow.connectors.SendSideMaterializedPipeliningConnectorPolicy;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
import edu.uci.ics.hyracks.dataflow.std.connectors.MToNPartitioningMergingConnectorDescriptor;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
public class ConnectorPolicyAssignmentPolicy implements IConnectorPolicyAssignmentPolicy {
private static final long serialVersionUID = 1L;
- private IConnectorPolicy senderSideMaterializePolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy senderSideMatPipPolicy = new SendSideMaterializedPipeliningConnectorPolicy();
+ private IConnectorPolicy senderSideMatBlkPolicy = new SendSideMaterializedBlockingConnectorPolicy();
private IConnectorPolicy pipeliningPolicy = new PipeliningConnectorPolicy();
+ private JobSpecification spec;
+
+ public ConnectorPolicyAssignmentPolicy(JobSpecification spec) {
+ this.spec = spec;
+ }
@Override
public IConnectorPolicy getConnectorPolicyAssignment(IConnectorDescriptor c, int nProducers, int nConsumers,
int[] fanouts) {
if (c instanceof MToNPartitioningMergingConnectorDescriptor) {
- return senderSideMaterializePolicy;
+ return senderSideMatPipPolicy;
} else {
- return pipeliningPolicy;
+ Pair<Pair<IOperatorDescriptor, Integer>, Pair<IOperatorDescriptor, Integer>> endPoints = spec
+ .getConnectorOperatorMap().get(c.getConnectorId());
+ IOperatorDescriptor consumer = endPoints.getRight().getLeft();
+ if (consumer instanceof TreeIndexInsertUpdateDeleteOperatorDescriptor) {
+ return senderSideMatBlkPolicy;
+ } else {
+ return pipeliningPolicy;
+ }
}
}
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
index 0133d761..2402748 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/HDFSFileWriteOperatorDescriptor.java
@@ -14,6 +14,8 @@
*/
package edu.uci.ics.pregelix.dataflow;
+import java.io.File;
+import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -23,9 +25,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -37,6 +37,7 @@
import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameDeserializer;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
import edu.uci.ics.hyracks.dataflow.std.base.AbstractUnaryInputSinkOperatorNodePushable;
+import edu.uci.ics.hyracks.hdfs.ContextFactory;
import edu.uci.ics.pregelix.api.graph.Vertex;
import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
import edu.uci.ics.pregelix.api.io.VertexWriter;
@@ -69,6 +70,7 @@
private TaskAttemptContext context;
private String TEMP_DIR = "_temporary";
private ClassLoader ctxCL;
+ private ContextFactory ctxFactory = new ContextFactory();
@Override
public void open() throws HyracksDataException {
@@ -80,8 +82,7 @@
conf = confFactory.createConfiguration();
VertexOutputFormat outputFormat = BspUtils.createVertexOutputFormat(conf);
- TaskAttemptID tid = new TaskAttemptID("", 0, true, partition, 0);
- context = new TaskAttemptContext(conf, tid);
+ context = ctxFactory.createContext(conf, partition);
try {
vertexWriter = outputFormat.createVertexWriter(context);
} catch (InterruptedException e) {
@@ -127,31 +128,26 @@
private void moveFilesToFinalPath() throws HyracksDataException {
try {
- JobContext job = new JobContext(conf, new JobID("0", 0));
+ JobContext job = ctxFactory.createJobContext(conf);
Path outputPath = FileOutputFormat.getOutputPath(job);
FileSystem dfs = FileSystem.get(conf);
Path filePath = new Path(outputPath, "part-" + new Integer(partition).toString());
- FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
- @Override
- public boolean accept(Path dir) {
- return dir.getName().endsWith(TEMP_DIR);
- }
- });
- Path tempDir = tempPaths[0].getPath();
- FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
- @Override
- public boolean accept(Path dir) {
- return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
- }
- });
- Path srcDir = results[0].getPath();
- if (!dfs.exists(srcDir))
- throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
-
- FileStatus[] srcFiles = dfs.listStatus(srcDir);
- Path srcFile = srcFiles[0].getPath();
- dfs.delete(filePath, true);
- dfs.rename(srcFile, filePath);
+ FileStatus[] results = findPartitionPaths(outputPath, dfs);
+ if (results.length >= 1) {
+ /**
+ * for Hadoop-0.20.2
+ */
+ renameFile(dfs, filePath, results);
+ } else {
+ /**
+ * for Hadoop-0.23.1
+ */
+ int jobId = job.getJobID().getId();
+ outputPath = new Path(outputPath.toString() + File.separator + TEMP_DIR + File.separator
+ + jobId);
+ results = findPartitionPaths(outputPath, dfs);
+ renameFile(dfs, filePath, results);
+ }
} catch (IOException e) {
throw new HyracksDataException(e);
} finally {
@@ -159,6 +155,36 @@
}
}
+ private FileStatus[] findPartitionPaths(Path outputPath, FileSystem dfs) throws FileNotFoundException,
+ IOException {
+ FileStatus[] tempPaths = dfs.listStatus(outputPath, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().endsWith(TEMP_DIR);
+ }
+ });
+ Path tempDir = tempPaths[0].getPath();
+ FileStatus[] results = dfs.listStatus(tempDir, new PathFilter() {
+ @Override
+ public boolean accept(Path dir) {
+ return dir.getName().indexOf(context.getTaskAttemptID().toString()) >= 0;
+ }
+ });
+ return results;
+ }
+
+ private void renameFile(FileSystem dfs, Path filePath, FileStatus[] results) throws IOException,
+ HyracksDataException, FileNotFoundException {
+ Path srcDir = results[0].getPath();
+ if (!dfs.exists(srcDir))
+ throw new HyracksDataException("file " + srcDir.toString() + " does not exist!");
+
+ FileStatus[] srcFiles = dfs.listStatus(srcDir);
+ Path srcFile = srcFiles[0].getPath();
+ dfs.delete(filePath, true);
+ dfs.rename(srcFile, filePath);
+ }
+
};
}
}
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
index a38b19e..0da7baf 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/VertexFileScanOperatorDescriptor.java
@@ -26,7 +26,6 @@
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -134,11 +133,11 @@
appender.reset(frame, true);
VertexInputFormat vertexInputFormat = BspUtils.createVertexInputFormat(conf);
- TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
InputSplit split = splits.get(splitId);
+ TaskAttemptContext mapperContext = ctxFactory.createContext(conf, splitId);
- VertexReader vertexReader = vertexInputFormat.createVertexReader(split, context);
- vertexReader.initialize(split, context);
+ VertexReader vertexReader = vertexInputFormat.createVertexReader(split, mapperContext);
+ vertexReader.initialize(split, mapperContext);
Vertex readerVertex = (Vertex) BspUtils.createVertex(conf);
ArrayTupleBuilder tb = new ArrayTupleBuilder(fieldSize);
DataOutput dos = tb.getDataOutput();
@@ -146,7 +145,6 @@
/**
* set context
*/
- TaskAttemptContext mapperContext = ctxFactory.createContext(conf, splits.get(splitId));
Vertex.setContext(mapperContext);
/**
diff --git a/pregelix/pregelix-example/pom.xml b/pregelix/pregelix-example/pom.xml
index b3f78e1..e643331 100644
--- a/pregelix/pregelix-example/pom.xml
+++ b/pregelix/pregelix-example/pom.xml
@@ -77,6 +77,7 @@
</configuration>
</plugin>
<plugin>
+ <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-clean-plugin</artifactId>
<version>2.5</version>
<configuration>
diff --git a/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
new file mode 100644
index 0000000..5773602
--- /dev/null
+++ b/pregelix/pregelix-example/src/main/java/edu/uci/ics/pregelix/example/GraphMutationVertex.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.pregelix.example;
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+
+import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.io.VertexWriter;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat;
+import edu.uci.ics.pregelix.api.io.text.TextVertexOutputFormat.TextVertexWriter;
+import edu.uci.ics.pregelix.api.job.PregelixJob;
+import edu.uci.ics.pregelix.example.client.Client;
+import edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat;
+import edu.uci.ics.pregelix.example.io.VLongWritable;
+
+/**
+ * Demonstrates the basic graph vertex insert/delete implementation.
+ */
+public class GraphMutationVertex extends Vertex<VLongWritable, DoubleWritable, FloatWritable, DoubleWritable> {
+
+ private VLongWritable vid = new VLongWritable();
+ private GraphMutationVertex newVertex = null;
+
+ @Override
+ public void compute(Iterator<DoubleWritable> msgIterator) {
+ if (Vertex.getSuperstep() == 1) {
+ if (newVertex == null) {
+ newVertex = new GraphMutationVertex();
+ }
+ if (getVertexId().get() % 2 == 0) {
+ deleteVertex(getVertexId());
+ } else {
+ vid.set(100 * getVertexId().get());
+ newVertex.setVertexId(vid);
+ newVertex.setVertexValue(getVertexValue());
+ addVertex(vid, newVertex);
+ }
+ voteToHalt();
+ } else {
+ if (getVertexId().get() % 190 == 0) {
+ deleteVertex(getVertexId());
+ }
+ voteToHalt();
+ }
+ }
+
+ /**
+ * Simple VertexWriter that supports {@link SimplePageRankVertex}
+ */
+ public static class SimpleGraphMutationVertexWriter extends
+ TextVertexWriter<VLongWritable, DoubleWritable, FloatWritable> {
+ public SimpleGraphMutationVertexWriter(RecordWriter<Text, Text> lineRecordWriter) {
+ super(lineRecordWriter);
+ }
+
+ @Override
+ public void writeVertex(Vertex<VLongWritable, DoubleWritable, FloatWritable, ?> vertex) throws IOException,
+ InterruptedException {
+ getRecordWriter().write(new Text(vertex.getVertexId().toString()),
+ new Text(vertex.getVertexValue().toString()));
+ }
+ }
+
+ @Override
+ public String toString() {
+ return getVertexId() + " " + getVertexValue();
+ }
+
+ /**
+ * Simple VertexOutputFormat that supports {@link SimplePageRankVertex}
+ */
+ public static class SimpleGraphMutationVertexOutputFormat extends
+ TextVertexOutputFormat<VLongWritable, DoubleWritable, FloatWritable> {
+
+ @Override
+ public VertexWriter<VLongWritable, DoubleWritable, FloatWritable> createVertexWriter(TaskAttemptContext context)
+ throws IOException, InterruptedException {
+ RecordWriter<Text, Text> recordWriter = textOutputFormat.getRecordWriter(context);
+ return new SimpleGraphMutationVertexWriter(recordWriter);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ PregelixJob job = new PregelixJob(GraphMutationVertex.class.getSimpleName());
+ job.setVertexClass(GraphMutationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleGraphMutationVertexOutputFormat.class);
+ Client.run(args, job);
+ }
+
+}
diff --git a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
index c353d84..ca5a1c4 100644
--- a/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
+++ b/pregelix/pregelix-example/src/test/java/edu/uci/ics/pregelix/example/jobgen/JobGenerator.java
@@ -25,6 +25,8 @@
import edu.uci.ics.pregelix.api.job.PregelixJob;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex;
import edu.uci.ics.pregelix.example.ConnectedComponentsVertex.SimpleConnectedComponentsVertexOutputFormat;
+import edu.uci.ics.pregelix.example.GraphMutationVertex;
+import edu.uci.ics.pregelix.example.GraphMutationVertex.SimpleGraphMutationVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex;
import edu.uci.ics.pregelix.example.PageRankVertex.SimplePageRankVertexOutputFormat;
import edu.uci.ics.pregelix.example.PageRankVertex.SimulatedPageRankVertexInputFormat;
@@ -217,6 +219,17 @@
job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
}
+ private static void generateGraphMutationJob(String jobName, String outputPath) throws IOException {
+ PregelixJob job = new PregelixJob(jobName);
+ job.setVertexClass(GraphMutationVertex.class);
+ job.setVertexInputFormatClass(TextPageRankInputFormat.class);
+ job.setVertexOutputFormatClass(SimpleGraphMutationVertexOutputFormat.class);
+ FileInputFormat.setInputPaths(job, HDFS_INPUTPATH);
+ FileOutputFormat.setOutputPath(job, new Path(HDFS_OUTPUTPAH));
+ job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
+ job.getConfiguration().writeXml(new FileOutputStream(new File(outputPath)));
+ }
+
private static void genPageRank() throws IOException {
generatePageRankJob("PageRank", outputBase + "PageRank.xml");
generatePageRankJobReal("PageRank", outputBase + "PageRankReal.xml");
@@ -250,6 +263,10 @@
generateMaximalCliqueJob("Maximal Clique", outputBase + "MaximalClique.xml");
}
+ private static void genGraphMutation() throws IOException {
+ generateGraphMutationJob("Graph Mutation", outputBase + "GraphMutation.xml");
+ }
+
public static void main(String[] args) throws IOException {
genPageRank();
genShortestPath();
@@ -257,5 +274,6 @@
genReachibility();
genTriangleCounting();
genMaximalClique();
+ genGraphMutation();
}
}
diff --git a/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result b/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result
new file mode 100644
index 0000000..3f2fd60
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/expected/GraphMutation.result
@@ -0,0 +1,19 @@
+1 0.0
+3 0.0
+5 0.0
+7 0.0
+9 0.0
+11 0.0
+13 0.0
+15 0.0
+17 0.0
+19 0.0
+100 0.0
+300 0.0
+500 0.0
+700 0.0
+900 0.0
+1100 0.0
+1300 0.0
+1500 0.0
+1700 0.0
diff --git a/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties b/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
index d5e6004..3335964 100755
--- a/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
+++ b/pregelix/pregelix-example/src/test/resources/hadoop/conf/log4j.properties
@@ -76,7 +76,7 @@
# FSNamesystem Audit logging
# All audit events are logged at INFO level
#
-log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
+log4j.logger.org.apache.hadoop=FATAL
# Custom Logging levels
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml b/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml
new file mode 100644
index 0000000..9f51f6d
--- /dev/null
+++ b/pregelix/pregelix-example/src/test/resources/jobs/GraphMutation.xml
@@ -0,0 +1,141 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
+<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
+<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>
+<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>
+<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>
+<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
+<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
+<property><name>mapred.input.dir</name><value>file:/webmap</value></property>
+<property><name>mapred.submit.replication</name><value>10</value></property>
+<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
+<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
+<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
+<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>
+<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
+<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>
+<property><name>keep.failed.task.files</name><value>false</value></property>
+<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>
+<property><name>io.bytes.per.checksum</name><value>512</value></property>
+<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
+<property><name>hadoop.util.hash.type</name><value>murmur</value></property>
+<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>
+<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
+<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
+<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>
+<property><name>fs.checkpoint.period</name><value>3600</value></property>
+<property><name>mapred.child.tmp</name><value>./tmp</value></property>
+<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
+<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>
+<property><name>hadoop.logfile.count</name><value>10</value></property>
+<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
+<property><name>mapred.output.dir</name><value>/result</value></property>
+<property><name>io.map.index.skip</name><value>0</value></property>
+<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
+<property><name>mapred.output.compress</name><value>false</value></property>
+<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
+<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
+<property><name>fs.checkpoint.size</name><value>67108864</value></property>
+<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>
+<property><name>mapred.job.name</name><value>Graph Mutation</value></property>
+<property><name>local.cache.size</name><value>10737418240</value></property>
+<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>
+<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
+<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
+<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
+<property><name>mapred.task.timeout</name><value>600000</value></property>
+<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
+<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>
+<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
+<property><name>ipc.client.kill.max</name><value>10</value></property>
+<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>
+<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
+<property><name>io.sort.record.percent</name><value>0.05</value></property>
+<property><name>hadoop.security.authorization</name><value>false</value></property>
+<property><name>mapred.max.tracker.failures</name><value>4</value></property>
+<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>
+<property><name>pregelix.numVertices</name><value>20</value></property>
+<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
+<property><name>mapred.map.tasks</name><value>2</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
+<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
+<property><name>fs.default.name</name><value>file:///</value></property>
+<property><name>tasktracker.http.threads</name><value>40</value></property>
+<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
+<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
+<property><name>mapred.reduce.tasks</name><value>1</value></property>
+<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>
+<property><name>pregelix.vertexClass</name><value>edu.uci.ics.pregelix.example.GraphMutationVertex</value></property>
+<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>
+<property><name>io.file.buffer.size</name><value>4096</value></property>
+<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>
+<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
+<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
+<property><name>fs.har.impl.disable.cache</name><value>true</value></property>
+<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
+<property><name>hadoop.native.lib</name><value>true</value></property>
+<property><name>fs.s3.block.size</name><value>67108864</value></property>
+<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>
+<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
+<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
+<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
+<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>
+<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
+<property><name>mapred.queue.names</name><value>default</value></property>
+<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>
+<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>
+<property><name>mapred.job.tracker</name><value>local</value></property>
+<property><name>io.skip.checksum.errors</name><value>false</value></property>
+<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
+<property><name>fs.s3.maxRetries</name><value>4</value></property>
+<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
+<property><name>fs.trash.interval</name><value>0</value></property>
+<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
+<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
+<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
+<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>
+<property><name>io.sort.mb</name><value>100</value></property>
+<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
+<property><name>io.sort.factor</name><value>10</value></property>
+<property><name>mapred.task.profile</name><value>false</value></property>
+<property><name>job.end.retry.interval</name><value>30000</value></property>
+<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>
+<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
+<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
+<property><name>webinterface.private.actions</name><value>false</value></property>
+<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
+<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>
+<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
+<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>
+<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
+<property><name>mapred.compress.map.output</name><value>false</value></property>
+<property><name>io.sort.spill.percent</name><value>0.80</value></property>
+<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>
+<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
+<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
+<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>
+<property><name>job.end.retry.attempts</name><value>0</value></property>
+<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
+<property><name>pregelix.vertexOutputFormatClass</name><value>edu.uci.ics.pregelix.example.GraphMutationVertex$SimpleGraphMutationVertexOutputFormat</value></property>
+<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
+<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
+<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
+<property><name>hadoop.logfile.size</name><value>10000000</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>mapred.job.queue.name</name><value>default</value></property>
+<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
+<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>
+<property><name>topology.script.number.args</name><value>100</value></property>
+<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>
+<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>
+<property><name>mapred.task.cache.levels</name><value>2</value></property>
+<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
+<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
+<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
+<property><name>mapred.min.split.size</name><value>0</value></property>
+<property><name>mapred.map.max.attempts</name><value>4</value></property>
+<property><name>jobclient.output.filter</name><value>FAILED</value></property>
+<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
+<property><name>mapred.acls.enabled</name><value>false</value></property>
+</configuration>
\ No newline at end of file
diff --git a/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml b/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
index ee2acc1..0f44f4d 100644
--- a/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
+++ b/pregelix/pregelix-example/src/test/resources/jobs/TriangleCounting.xml
@@ -121,7 +121,7 @@
<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
<property><name>hadoop.logfile.size</name><value>10000000</value></property>
-<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.inputformat.TextPageRankInputFormat</value></property>
+<property><name>pregelix.vertexInputFormatClass</name><value>edu.uci.ics.pregelix.example.trianglecounting.TextTriangleCountingInputFormat</value></property>
<property><name>pregelix.aggregatorClass</name><value>edu.uci.ics.pregelix.example.trianglecounting.TriangleCountingAggregator</value></property>
<property><name>mapred.job.queue.name</name><value>default</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
index a0dca3d..f7958d9 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/ComputeUpdateFunctionFactory.java
@@ -188,7 +188,7 @@
/**
* this partition should not terminate
*/
- if (terminate && (!vertex.isHalted() || vertex.hasMessage()))
+ if (terminate && (!vertex.isHalted() || vertex.hasMessage() || vertex.createdNewLiveVertex()))
terminate = false;
aggregator.step(vertex);
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
index 3d8a355..0cf64a0 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/StartComputeUpdateFunctionFactory.java
@@ -189,7 +189,7 @@
/**
* this partition should not terminate
*/
- if (terminate && (!vertex.isHalted() || vertex.hasMessage()))
+ if (terminate && (!vertex.isHalted() || vertex.hasMessage() || vertex.createdNewLiveVertex()))
terminate = false;
/**
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
index d968262..c025f85 100644
--- a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/touchpoint/RuntimeHookFactory.java
@@ -16,6 +16,7 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
@@ -45,7 +46,7 @@
public void configure(IHyracksTaskContext ctx) throws HyracksDataException {
Configuration conf = confFactory.createConfiguration();
try {
- TaskAttemptContext mapperContext = ctxFactory.createContext(conf, null);
+ TaskAttemptContext mapperContext = ctxFactory.createContext(conf, new TaskAttemptID());
Vertex.setContext(mapperContext);
BspUtils.setDefaultConfiguration(conf);
} catch (Exception e) {