split hivesterix into serveral modules
git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_release_cleanup@3074 123451ca-8445-de46-9d55-352943316053
diff --git a/hivesterix/hivesterix-common/pom.xml b/hivesterix/hivesterix-common/pom.xml
new file mode 100644
index 0000000..33d8fb3
--- /dev/null
+++ b/hivesterix/hivesterix-common/pom.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0"?>
+<project
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+ xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hivesterix-common</artifactId>
+ <name>hivesterix-common</name>
+
+ <parent>
+ <artifactId>hivesterix</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.3-SNAPSHOT</version>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ <encoding>UTF-8</encoding>
+ <fork>true</fork>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-api</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>algebricks-compiler</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-hdfs-core</artifactId>
+ <version>0.2.3-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop.hive</groupId>
+ <artifactId>hive-exec</artifactId>
+ <version>0.7.0</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ </dependencies>
+</project>
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
new file mode 100644
index 0000000..025f423
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/common/config/ConfUtil.java
@@ -0,0 +1,139 @@
+package edu.uci.ics.hivesterix.common.config;
+
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.JobConf;
+
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.client.NodeControllerInfo;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.topology.ClusterTopology;
+
+@SuppressWarnings({ "rawtypes", "deprecation" })
+public class ConfUtil {
+
+ private static JobConf job;
+ private static HiveConf hconf;
+ private static String[] NCs;
+ private static Map<String, List<String>> ncMapping;
+ private static IHyracksClientConnection hcc = null;
+ private static ClusterTopology topology = null;
+
+ public static JobConf getJobConf(Class<? extends InputFormat> format, Path path) {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ conf.setInputFormat(format);
+ FileInputFormat.setInputPaths(conf, path);
+ return conf;
+ }
+
+ public static JobConf getJobConf() {
+ JobConf conf = new JobConf();
+ if (job != null)
+ conf = job;
+
+ String hadoopPath = System.getProperty("HADOOP_HOME", "/hadoop");
+ Path pathCore = new Path(hadoopPath + "/conf/core-site.xml");
+ conf.addResource(pathCore);
+ Path pathMapRed = new Path(hadoopPath + "/conf/mapred-site.xml");
+ conf.addResource(pathMapRed);
+ Path pathHDFS = new Path(hadoopPath + "/conf/hdfs-site.xml");
+ conf.addResource(pathHDFS);
+
+ return conf;
+ }
+
+ public static void setJobConf(JobConf conf) {
+ job = conf;
+ }
+
+ public static void setHiveConf(HiveConf hiveConf) {
+ hconf = hiveConf;
+ }
+
+ public static HiveConf getHiveConf() {
+ if (hconf == null) {
+ hconf = new HiveConf(SessionState.class);
+ hconf.addResource(new Path("conf/hive-default.xml"));
+ }
+ return hconf;
+ }
+
+ public static String[] getNCs() throws HyracksException {
+ if (NCs == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return NCs;
+ }
+
+ public static Map<String, List<String>> getNCMapping() throws HyracksException {
+ if (ncMapping == null) {
+ try {
+ loadClusterConfig();
+ } catch (Exception e) {
+ throw new HyracksException(e);
+ }
+ }
+ return ncMapping;
+ }
+
+ private static void loadClusterConfig() {
+ try {
+ getHiveConf();
+ String ipAddress = hconf.get("hive.hyracks.host");
+ int port = Integer.parseInt(hconf.get("hive.hyracks.port"));
+ int mpl = Integer.parseInt(hconf.get("hive.hyracks.parrallelism"));
+ hcc = new HyracksConnection(ipAddress, port);
+ topology = hcc.getClusterTopology();
+ Map<String, NodeControllerInfo> ncNameToNcInfos = hcc.getNodeControllerInfos();
+ NCs = new String[ncNameToNcInfos.size() * mpl];
+ ncMapping = new HashMap<String, List<String>>();
+ int i = 0;
+ for (Map.Entry<String, NodeControllerInfo> entry : ncNameToNcInfos.entrySet()) {
+ String ipAddr = InetAddress.getByAddress(entry.getValue().getNetworkAddress().getIpAddress())
+ .getHostAddress();
+ List<String> matchedNCs = ncMapping.get(ipAddr);
+ if (matchedNCs == null) {
+ matchedNCs = new ArrayList<String>();
+ ncMapping.put(ipAddr, matchedNCs);
+ }
+ matchedNCs.add(entry.getKey());
+ for (int j = i * mpl; j < i * mpl + mpl; j++)
+ NCs[j] = entry.getKey();
+ i++;
+ }
+ } catch (Exception e) {
+ throw new IllegalStateException(e);
+ }
+ }
+
+ public static ClusterTopology getClusterTopology() {
+ if (topology == null)
+ loadClusterConfig();
+ return topology;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
new file mode 100644
index 0000000..8fb715b
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionConstant.java
@@ -0,0 +1,24 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+/**
+ * some constants for expression
+ *
+ * @author yingyib
+ */
+public class ExpressionConstant {
+
+ /**
+ * name space for function identifier
+ */
+ public static String NAMESPACE = "hive";
+
+ /**
+ * field expression: modeled as function in Algebricks
+ */
+ public static String FIELDACCESS = "fieldaccess";
+
+ /**
+ * null string: modeled as null in Algebricks
+ */
+ public static String NULL = "null";
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
new file mode 100644
index 0000000..662ed83
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/ExpressionTranslator.java
@@ -0,0 +1,209 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class ExpressionTranslator {
+
+ public static Object getHiveExpression(ILogicalExpression expr, IVariableTypeEnvironment env) throws Exception {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+ FunctionIdentifier fid = funcInfo.getFunctionIdentifier();
+
+ if (fid.getName().equals(ExpressionConstant.FIELDACCESS)) {
+ Object info = ((HiveFunctionInfo) funcInfo).getInfo();
+ ExprNodeFieldDesc desc = (ExprNodeFieldDesc) info;
+ return new ExprNodeFieldDesc(desc.getTypeInfo(), desc.getDesc(), desc.getFieldName(), desc.getIsList());
+ }
+
+ if (fid.getName().equals(ExpressionConstant.NULL)) {
+ return new ExprNodeNullDesc();
+ }
+
+ /**
+ * argument expressions: translate argument expressions recursively
+ * first, this logic is shared in scalar, aggregation and unnesting
+ * function
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ /**
+ * parameters could not be aggregate function desc
+ */
+ ExprNodeDesc parameter = (ExprNodeDesc) getHiveExpression(argument.getValue(), env);
+ parameters.add(parameter);
+ }
+
+ /**
+ * get expression
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(fid);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+
+ int inputSize = parameters.size();
+ List<ExprNodeDesc> currentDescs = new ArrayList<ExprNodeDesc>();
+
+ // generate expression tree if necessary
+ while (inputSize > 2) {
+ int pairs = inputSize / 2;
+ for (int i = 0; i < pairs; i++) {
+ List<ExprNodeDesc> descs = new ArrayList<ExprNodeDesc>();
+ descs.add(parameters.get(2 * i));
+ descs.add(parameters.get(2 * i + 1));
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ currentDescs.add(desc);
+ }
+
+ if (inputSize % 2 != 0) {
+ // List<ExprNodeDesc> descs = new
+ // ArrayList<ExprNodeDesc>();
+ // ExprNodeDesc lastExpr =
+ // currentDescs.remove(currentDescs.size() - 1);
+ // descs.add(lastExpr);
+ currentDescs.add(parameters.get(inputSize - 1));
+ // ExprNodeDesc desc =
+ // ExprNodeGenericFuncDesc.newInstance(udf, descs);
+ // currentDescs.add(desc);
+ }
+ inputSize = currentDescs.size();
+ parameters.clear();
+ parameters.addAll(currentDescs);
+ currentDescs.clear();
+ }
+
+ } else {
+ Object secondInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ if (secondInfo != null) {
+
+ /**
+ * for GenericUDFBridge: we should not call get type of
+ * this hive expression, because parameters may have
+ * been changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) ((HiveFunctionInfo) funcInfo)
+ .getInfo();
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(fid.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ }
+ /**
+ * get hive generic function expression
+ */
+ ExprNodeDesc desc = ExprNodeGenericFuncDesc.newInstance(udf, parameters);
+ return desc;
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * set parameters
+ */
+ aggregateDesc.setParameters((ArrayList<ExprNodeDesc>) parameters);
+
+ List<TypeInfo> originalParameterTypeInfos = new ArrayList<TypeInfo>();
+ for (ExprNodeDesc parameter : parameters) {
+ if (parameter.getTypeInfo() instanceof StructTypeInfo) {
+ originalParameterTypeInfos.add(TypeInfoFactory.doubleTypeInfo);
+ } else
+ originalParameterTypeInfos.add(parameter.getTypeInfo());
+ }
+
+ GenericUDAFEvaluator eval = FunctionRegistry.getGenericUDAFEvaluator(
+ aggregateDesc.getGenericUDAFName(), originalParameterTypeInfos, aggregateDesc.getDistinct(),
+ false);
+
+ AggregationDesc newAggregateDesc = new AggregationDesc(aggregateDesc.getGenericUDAFName(), eval,
+ aggregateDesc.getParameters(), aggregateDesc.getDistinct(), aggregateDesc.getMode());
+ return newAggregateDesc;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ String funcName = hiveDesc.getUDTFName();
+ FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName);
+ GenericUDTF udtf = fi.getGenericUDTF();
+ UDTFDesc desc = new UDTFDesc(udtf);
+ return desc;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.VARIABLE)) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo typeInfo = (TypeInfo) env.getVarType(var);
+ ExprNodeDesc desc = new ExprNodeColumnDesc(typeInfo, var.toString(), "", false);
+ return desc;
+ } else if ((expr.getExpressionTag() == LogicalExpressionTag.CONSTANT)) {
+ /**
+ * get expression for constant in the environment
+ */
+ ConstantExpression varExpr = (ConstantExpression) expr;
+ Object value = ((HivesterixConstantValue) varExpr.getValue()).getObject();
+ ExprNodeDesc desc = new ExprNodeConstantDesc(value);
+ return desc;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
new file mode 100644
index 0000000..56890eb
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveAlgebricksBuiltInFunctionMap.java
@@ -0,0 +1,82 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.HashMap;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+public class HiveAlgebricksBuiltInFunctionMap {
+
+ /**
+ * hive auqa builtin function map instance
+ */
+ public static HiveAlgebricksBuiltInFunctionMap INSTANCE = new HiveAlgebricksBuiltInFunctionMap();
+
+ /**
+ * hive to Algebricks function name mapping
+ */
+ private HashMap<String, FunctionIdentifier> hiveToAlgebricksMap = new HashMap<String, FunctionIdentifier>();
+
+ /**
+ * Algebricks to hive function name mapping
+ */
+ private HashMap<FunctionIdentifier, String> AlgebricksToHiveMap = new HashMap<FunctionIdentifier, String>();
+
+ /**
+ * the bi-directional mapping between hive functions and Algebricks
+ * functions
+ */
+ private HiveAlgebricksBuiltInFunctionMap() {
+ hiveToAlgebricksMap.put("and", AlgebricksBuiltinFunctions.AND);
+ hiveToAlgebricksMap.put("or", AlgebricksBuiltinFunctions.OR);
+ hiveToAlgebricksMap.put("!", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("not", AlgebricksBuiltinFunctions.NOT);
+ hiveToAlgebricksMap.put("=", AlgebricksBuiltinFunctions.EQ);
+ hiveToAlgebricksMap.put("<>", AlgebricksBuiltinFunctions.NEQ);
+ hiveToAlgebricksMap.put(">", AlgebricksBuiltinFunctions.GT);
+ hiveToAlgebricksMap.put("<", AlgebricksBuiltinFunctions.LT);
+ hiveToAlgebricksMap.put(">=", AlgebricksBuiltinFunctions.GE);
+ hiveToAlgebricksMap.put("<=", AlgebricksBuiltinFunctions.LE);
+
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.AND, "and");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.OR, "or");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "!");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NOT, "not");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.EQ, "=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.NEQ, "<>");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GT, ">");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LT, "<");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.GE, ">=");
+ AlgebricksToHiveMap.put(AlgebricksBuiltinFunctions.LE, "<=");
+ }
+
+ /**
+ * get hive function name from Algebricks function identifier
+ *
+ * @param AlgebricksId
+ * @return hive
+ */
+ public String getHiveFunctionName(FunctionIdentifier AlgebricksId) {
+ return AlgebricksToHiveMap.get(AlgebricksId);
+ }
+
+ /**
+ * get hive UDF or Generic class's corresponding built-in functions
+ *
+ * @param funcClass
+ * @return function identifier
+ */
+ public FunctionIdentifier getAlgebricksFunctionId(Class<?> funcClass) {
+ Description annotation = (Description) funcClass.getAnnotation(Description.class);
+ String hiveUDFName = "";
+ if (annotation == null) {
+ hiveUDFName = null;
+ return null;
+ } else {
+ hiveUDFName = annotation.name();
+ return hiveToAlgebricksMap.get(hiveUDFName);
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
new file mode 100644
index 0000000..e10e8c1
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveExpressionTypeComputer.java
@@ -0,0 +1,179 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.exec.FunctionInfo;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.plan.UDTFDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.UnnestingFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HiveExpressionTypeComputer implements IExpressionTypeComputer {
+
+ public static IExpressionTypeComputer INSTANCE = new HiveExpressionTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IMetadataProvider<?, ?> metadataProvider,
+ IVariableTypeEnvironment env) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+ IFunctionInfo funcInfo = funcExpr.getFunctionInfo();
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof ScalarFunctionCallExpression) {
+
+ FunctionIdentifier AlgebricksId = funcInfo.getFunctionIdentifier();
+ Object functionInfo = ((HiveFunctionInfo) funcInfo).getInfo();
+ String udfName = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getHiveFunctionName(AlgebricksId);
+ GenericUDF udf;
+ if (udfName != null) {
+ /**
+ * get corresponding function info for built-in functions
+ */
+ FunctionInfo fInfo = FunctionRegistry.getFunctionInfo(udfName);
+ udf = fInfo.getGenericUDF();
+ } else if (functionInfo != null) {
+ /**
+ * for GenericUDFBridge: we should not call get type of this
+ * hive expression, because parameters may have been
+ * changed!
+ */
+ ExprNodeGenericFuncDesc hiveExpr = (ExprNodeGenericFuncDesc) functionInfo;
+ udf = hiveExpr.getGenericUDF();
+ } else {
+ /**
+ * for other generic UDF
+ */
+ Class<?> udfClass;
+ try {
+ udfClass = Class.forName(AlgebricksId.getName());
+ udf = (GenericUDF) udfClass.newInstance();
+ } catch (Exception e) {
+ e.printStackTrace();
+ throw new AlgebricksException(e.getMessage());
+ }
+ }
+ /**
+ * doing the actual type inference
+ */
+ ObjectInspector oi = null;
+ try {
+ oi = udf.initialize(childrenOIs);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
+ return exprType;
+
+ } else if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(aggregateDesc.getMode(), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else if (funcExpr instanceof UnnestingFunctionCallExpression) {
+ /**
+ * type inference for UDTF function
+ */
+ UDTFDesc hiveDesc = (UDTFDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ GenericUDTF udtf = hiveDesc.getGenericUDTF();
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = udtf.initialize(childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("unrecognized function expression " + expr.getClass().getName());
+ }
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+ /**
+ * get type for variable in the environment
+ */
+ VariableReferenceExpression varExpr = (VariableReferenceExpression) expr;
+ LogicalVariable var = varExpr.getVariableReference();
+ TypeInfo type = (TypeInfo) env.getVarType(var);
+ return type;
+ } else if (expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+ /**
+ * get type for constant, from its java class
+ */
+ ConstantExpression constExpr = (ConstantExpression) expr;
+ HivesterixConstantValue value = (HivesterixConstantValue) constExpr.getValue();
+ TypeInfo type = TypeInfoFactory.getPrimitiveTypeInfoFromJavaPrimitive(value.getObject().getClass());
+ return type;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
new file mode 100644
index 0000000..ced8d02
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveFunctionInfo.java
@@ -0,0 +1,36 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.io.Serializable;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
+
+public class HiveFunctionInfo implements IFunctionInfo, Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * primary function identifier
+ */
+ private transient FunctionIdentifier fid;
+
+ /**
+ * secondary function identifier: function name
+ */
+ private transient Object secondaryFid;
+
+ public HiveFunctionInfo(FunctionIdentifier fid, Object secondFid) {
+ this.fid = fid;
+ this.secondaryFid = secondFid;
+ }
+
+ @Override
+ public FunctionIdentifier getFunctionIdentifier() {
+ return fid;
+ }
+
+ public Object getInfo() {
+ return secondaryFid;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
new file mode 100644
index 0000000..b77fe49
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveMergeAggregationExpressionFactory.java
@@ -0,0 +1,74 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IMergeAggregationExpressionFactory;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+
+/**
+ * generate merge aggregation expression from an aggregation expression
+ *
+ * @author yingyib
+ */
+public class HiveMergeAggregationExpressionFactory implements IMergeAggregationExpressionFactory {
+
+ public static IMergeAggregationExpressionFactory INSTANCE = new HiveMergeAggregationExpressionFactory();
+
+ @Override
+ public ILogicalExpression createMergeAggregation(ILogicalExpression expr, IOptimizationContext context)
+ throws AlgebricksException {
+ /**
+ * type inference for scalar function
+ */
+ if (expr instanceof AggregateFunctionCallExpression) {
+ AggregateFunctionCallExpression funcExpr = (AggregateFunctionCallExpression) expr;
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregator = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo()).getInfo();
+ LogicalVariable inputVar = context.newVar();
+ ExprNodeDesc col = new ExprNodeColumnDesc(TypeInfoFactory.voidTypeInfo, inputVar.toString(), null, false);
+ ArrayList<ExprNodeDesc> parameters = new ArrayList<ExprNodeDesc>();
+ parameters.add(col);
+
+ GenericUDAFEvaluator.Mode mergeMode;
+ if (aggregator.getMode() == GenericUDAFEvaluator.Mode.PARTIAL1)
+ mergeMode = GenericUDAFEvaluator.Mode.PARTIAL2;
+ else if (aggregator.getMode() == GenericUDAFEvaluator.Mode.COMPLETE)
+ mergeMode = GenericUDAFEvaluator.Mode.FINAL;
+ else
+ mergeMode = aggregator.getMode();
+ AggregationDesc mergeDesc = new AggregationDesc(aggregator.getGenericUDAFName(),
+ aggregator.getGenericUDAFEvaluator(), parameters, aggregator.getDistinct(), mergeMode);
+
+ String UDAFName = mergeDesc.getGenericUDAFName();
+ List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();
+ arguments.add(new MutableObject<ILogicalExpression>(new VariableReferenceExpression(inputVar)));
+
+ FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("
+ + mergeDesc.getMode() + ")");
+ HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, mergeDesc);
+ AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo,
+ false, arguments);
+ return aggregationExpression;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
new file mode 100644
index 0000000..906e3ce
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HiveNullableTypeComputer.java
@@ -0,0 +1,15 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.INullableTypeComputer;
+
+public class HiveNullableTypeComputer implements INullableTypeComputer {
+
+ public static INullableTypeComputer INSTANCE = new HiveNullableTypeComputer();
+
+ @Override
+ public Object makeNullableType(Object type) throws AlgebricksException {
+ return type;
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
new file mode 100644
index 0000000..c74966c
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivePartialAggregationTypeComputer.java
@@ -0,0 +1,102 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.AggregationDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AggregateFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IExpressionTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IPartialAggregationTypeComputer;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
+import edu.uci.ics.hyracks.algebricks.core.algebra.metadata.IMetadataProvider;
+
+public class HivePartialAggregationTypeComputer implements IPartialAggregationTypeComputer {
+
+ public static IPartialAggregationTypeComputer INSTANCE = new HivePartialAggregationTypeComputer();
+
+ @Override
+ public Object getType(ILogicalExpression expr, IVariableTypeEnvironment env,
+ IMetadataProvider<?, ?> metadataProvider) throws AlgebricksException {
+ if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+ IExpressionTypeComputer tc = HiveExpressionTypeComputer.INSTANCE;
+ /**
+ * function expression
+ */
+ AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+
+ /**
+ * argument expressions, types, object inspectors
+ */
+ List<Mutable<ILogicalExpression>> arguments = funcExpr.getArguments();
+ List<TypeInfo> argumentTypes = new ArrayList<TypeInfo>();
+
+ /**
+ * get types of argument
+ */
+ for (Mutable<ILogicalExpression> argument : arguments) {
+ TypeInfo type = (TypeInfo) tc.getType(argument.getValue(), metadataProvider, env);
+ argumentTypes.add(type);
+ }
+
+ ObjectInspector[] childrenOIs = new ObjectInspector[argumentTypes.size()];
+
+ /**
+ * get object inspector
+ */
+ for (int i = 0; i < argumentTypes.size(); i++) {
+ childrenOIs[i] = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(argumentTypes.get(i));
+ }
+
+ /**
+ * type inference for scalar function
+ */
+ if (funcExpr instanceof AggregateFunctionCallExpression) {
+ /**
+ * hive aggregation info
+ */
+ AggregationDesc aggregateDesc = (AggregationDesc) ((HiveFunctionInfo) funcExpr.getFunctionInfo())
+ .getInfo();
+ /**
+ * type inference for aggregation function
+ */
+ GenericUDAFEvaluator result = aggregateDesc.getGenericUDAFEvaluator();
+
+ ObjectInspector returnOI = null;
+ try {
+ returnOI = result.init(getPartialMode(aggregateDesc.getMode()), childrenOIs);
+ } catch (HiveException e) {
+ e.printStackTrace();
+ }
+ TypeInfo exprType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI);
+ return exprType;
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ } else {
+ throw new IllegalStateException("illegal expressions " + expr.getClass().getName());
+ }
+ }
+
+ private Mode getPartialMode(Mode mode) {
+ Mode partialMode;
+ if (mode == Mode.FINAL)
+ partialMode = Mode.PARTIAL2;
+ else if (mode == Mode.COMPLETE)
+ partialMode = Mode.PARTIAL1;
+ else
+ partialMode = mode;
+ return partialMode;
+ }
+}
diff --git a/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
new file mode 100644
index 0000000..3d35e1f
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/main/java/edu/uci/ics/hivesterix/logical/expression/HivesterixConstantValue.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.hivesterix.logical.expression;
+
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.IAlgebricksConstantValue;
+
+public class HivesterixConstantValue implements IAlgebricksConstantValue {
+
+ private Object object;
+
+ public HivesterixConstantValue(Object object) {
+ this.setObject(object);
+ }
+
+ @Override
+ public boolean isFalse() {
+ return object == Boolean.FALSE;
+ }
+
+ @Override
+ public boolean isNull() {
+ return object == null;
+ }
+
+ @Override
+ public boolean isTrue() {
+ return object == Boolean.TRUE;
+ }
+
+ public void setObject(Object object) {
+ this.object = object;
+ }
+
+ public Object getObject() {
+ return object;
+ }
+
+ @Override
+ public String toString() {
+ return object.toString();
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof HivesterixConstantValue)) {
+ return false;
+ }
+ HivesterixConstantValue v2 = (HivesterixConstantValue) o;
+ return object.equals(v2.getObject());
+ }
+
+ @Override
+ public int hashCode() {
+ return object.hashCode();
+ }
+
+}
diff --git a/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
new file mode 100644
index 0000000..0c701c8
--- /dev/null
+++ b/hivesterix/hivesterix-common/src/test/java/edu/uci/ics/hyracks/AppTest.java
@@ -0,0 +1,38 @@
+package edu.uci.ics.hyracks;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest
+ extends TestCase
+{
+ /**
+ * Create the test case
+ *
+ * @param testName name of the test case
+ */
+ public AppTest( String testName )
+ {
+ super( testName );
+ }
+
+ /**
+ * @return the suite of tests being tested
+ */
+ public static Test suite()
+ {
+ return new TestSuite( AppTest.class );
+ }
+
+ /**
+ * Rigourous Test :-)
+ */
+ public void testApp()
+ {
+ assertTrue( true );
+ }
+}