Merge fullstack_asterix_stabilization into fullstack_hyracks_result_distribution.

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_hyracks_result_distribution@3124 123451ca-8445-de46-9d55-352943316053
diff --git a/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
new file mode 100755
index 0000000..de3757f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/assembly/binary-assembly.xml
@@ -0,0 +1,26 @@
+<assembly>
+	<id>binary-assembly</id>
+	<formats>
+		<format>zip</format>
+		<format>dir</format>
+	</formats>
+	<includeBaseDirectory>false</includeBaseDirectory>
+	<fileSets>
+		<fileSet>
+			<directory>target/appassembler/bin</directory>
+			<outputDirectory>bin</outputDirectory>
+			<fileMode>0755</fileMode>
+		</fileSet>
+		<fileSet>
+			<directory>target/appassembler/lib</directory>
+			<outputDirectory>lib</outputDirectory>
+		</fileSet>
+		<fileSet>
+			<directory>target</directory>
+			<outputDirectory>lib</outputDirectory>
+			<includes>
+				<include>*.jar</include>
+			</includes>
+		</fileSet>
+	</fileSets>
+</assembly>
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
new file mode 100644
index 0000000..379737f
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -0,0 +1,596 @@
+package edu.uci.ics.hivesterix.runtime.exec;

+

+import java.io.BufferedReader;

+import java.io.FileInputStream;

+import java.io.InputStream;

+import java.io.InputStreamReader;

+import java.io.PrintWriter;

+import java.io.Serializable;

+import java.net.InetAddress;

+import java.util.ArrayList;

+import java.util.HashMap;

+import java.util.Iterator;

+import java.util.List;

+import java.util.Map;

+import java.util.Map.Entry;

+import java.util.Properties;

+import java.util.Set;

+

+import org.apache.commons.logging.Log;

+import org.apache.commons.logging.LogFactory;

+import org.apache.hadoop.hive.conf.HiveConf;

+import org.apache.hadoop.hive.ql.exec.ConditionalTask;

+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;

+import org.apache.hadoop.hive.ql.exec.MapRedTask;

+import org.apache.hadoop.hive.ql.exec.Operator;

+import org.apache.hadoop.hive.ql.exec.TableScanOperator;

+import org.apache.hadoop.hive.ql.exec.Task;

+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;

+import org.apache.hadoop.hive.ql.plan.FetchWork;

+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;

+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;

+import org.apache.hadoop.hive.ql.plan.MapredWork;

+import org.apache.hadoop.hive.ql.plan.PartitionDesc;

+import org.apache.hadoop.hive.ql.plan.TableScanDesc;

+

+import edu.uci.ics.hivesterix.common.config.ConfUtil;

+import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;

+import edu.uci.ics.hivesterix.logical.expression.HiveMergeAggregationExpressionFactory;

+import edu.uci.ics.hivesterix.logical.expression.HiveNullableTypeComputer;

+import edu.uci.ics.hivesterix.logical.expression.HivePartialAggregationTypeComputer;

+import edu.uci.ics.hivesterix.logical.plan.HiveAlgebricksTranslator;

+import edu.uci.ics.hivesterix.logical.plan.HiveLogicalPlanAndMetaData;

+import edu.uci.ics.hivesterix.optimizer.rulecollections.HiveRuleCollections;

+import edu.uci.ics.hivesterix.runtime.factory.evaluator.HiveExpressionRuntimeProvider;

+import edu.uci.ics.hivesterix.runtime.factory.nullwriter.HiveNullWriterFactory;

+import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryBooleanInspectorFactory;

+import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryIntegerInspectorFactory;

+import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy;

+import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy.Policy;

+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryComparatorFactoryProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFactoryProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFamilyProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveNormalizedKeyComputerFactoryProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HivePrinterFactoryProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveSerializerDeserializerProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveTypeTraitProvider;

+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;

+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;

+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;

+import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder;

+import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder.DefaultOptimizationContextFactory;

+import edu.uci.ics.hyracks.algebricks.compiler.api.ICompiler;

+import edu.uci.ics.hyracks.algebricks.compiler.api.ICompilerFactory;

+import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController;

+import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialOnceRuleController;

+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;

+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;

+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;

+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;

+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController;

+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;

+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;

+import edu.uci.ics.hyracks.api.client.HyracksConnection;

+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;

+import edu.uci.ics.hyracks.api.job.JobId;

+import edu.uci.ics.hyracks.api.job.JobSpecification;

+

+@SuppressWarnings({ "rawtypes", "unchecked" })

+public class HyracksExecutionEngine implements IExecutionEngine {

+

+    private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());

+    private static final String clusterPropertiesPath = "conf/cluster.properties";

+    private static final String masterFilePath = "conf/master";

+

+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

+    static {

+        SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);

+        SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);

+        SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

+                HiveRuleCollections.NORMALIZATION));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.COND_PUSHDOWN_AND_JOIN_INFERENCE));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

+                HiveRuleCollections.LOAD_FIELDS));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.OP_PUSHDOWN));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.DATA_EXCHANGE));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.CONSOLIDATION));

+

+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.PHYSICAL_PLAN_REWRITES));

+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.prepareJobGenRules));

+    }

+

+    /**

+     * static configurations for compiler

+     */

+    private HeuristicCompilerFactoryBuilder builder;

+

+    /**

+     * compiler

+     */

+    private ICompiler compiler;

+

+    /**

+     * physical optimization config

+     */

+    private PhysicalOptimizationConfig physicalOptimizationConfig;

+

+    /**

+     * final ending operators

+     */

+    private List<Operator> leaveOps = new ArrayList<Operator>();

+

+    /**

+     * tasks that are already visited

+     */

+    private Map<Task<? extends Serializable>, Boolean> tasksVisited = new HashMap<Task<? extends Serializable>, Boolean>();

+

+    /**

+     * hyracks job spec

+     */

+    private JobSpecification jobSpec;

+

+    /**

+     * hive configuration

+     */

+    private HiveConf conf;

+

+    /**

+     * plan printer

+     */

+    private PrintWriter planPrinter;

+

+    /**

+     * properties

+     */

+    private Properties clusterProps;

+

+    /**

+     * the Hyracks client connection

+     */

+    private IHyracksClientConnection hcc;

+

+    public HyracksExecutionEngine(HiveConf conf) {

+        this.conf = conf;

+        init(conf);

+    }

+

+    public HyracksExecutionEngine(HiveConf conf, PrintWriter planPrinter) {

+        this.conf = conf;

+        this.planPrinter = planPrinter;

+        init(conf);

+    }

+

+    private void init(HiveConf conf) {

+        builder = new HeuristicCompilerFactoryBuilder(DefaultOptimizationContextFactory.INSTANCE);

+        builder.setLogicalRewrites(DEFAULT_LOGICAL_REWRITES);

+        builder.setPhysicalRewrites(DEFAULT_PHYSICAL_REWRITES);

+        builder.setIMergeAggregationExpressionFactory(HiveMergeAggregationExpressionFactory.INSTANCE);

+        builder.setExpressionTypeComputer(HiveExpressionTypeComputer.INSTANCE);

+        builder.setNullableTypeComputer(HiveNullableTypeComputer.INSTANCE);

+

+        long memSizeExternalGby = conf.getLong("hive.algebricks.groupby.external.memory", 268435456);

+        long memSizeExternalSort = conf.getLong("hive.algebricks.sort.memory", 536870912);

+        int frameSize = conf.getInt("hive.algebricks.framesize", 32768);

+

+        physicalOptimizationConfig = new PhysicalOptimizationConfig();

+        int frameLimitExtGby = (int) (memSizeExternalGby / frameSize);

+        physicalOptimizationConfig.setMaxFramesExternalGroupBy(frameLimitExtGby);

+        int frameLimitExtSort = (int) (memSizeExternalSort / frameSize);

+        physicalOptimizationConfig.setMaxFramesExternalSort(frameLimitExtSort);

+        builder.setPhysicalOptimizationConfig(physicalOptimizationConfig);

+    }

+

+    @Override

+    public int compileJob(List<Task<? extends Serializable>> rootTasks) {

+        // clean up

+        leaveOps.clear();

+        tasksVisited.clear();

+        jobSpec = null;

+

+        HashMap<String, PartitionDesc> aliasToPath = new HashMap<String, PartitionDesc>();

+        List<Operator> rootOps = generateRootOperatorDAG(rootTasks, aliasToPath);

+

+        // get all leave Ops

+        getLeaves(rootOps, leaveOps);

+

+        HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();

+        try {

+            translator.translate(rootOps, null, aliasToPath);

+

+            ILogicalPlan plan = translator.genLogicalPlan();

+

+            if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {

+                translator.printOperators();

+                ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,

+                        translator.getMetadataProvider());

+

+                ICompilerFactory compilerFactory = builder.create();

+                compiler = compilerFactory.createCompiler(planAndMetadata.getPlan(),

+                        planAndMetadata.getMetadataProvider(), translator.getVariableCounter());

+

+                // run optimization and re-writing rules for Hive plan

+                compiler.optimize();

+

+                // print optimized plan

+                LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

+                StringBuilder buffer = new StringBuilder();

+                PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

+                String planStr = buffer.toString();

+                System.out.println(planStr);

+

+                if (planPrinter != null)

+                    planPrinter.print(planStr);

+            }

+        } catch (Exception e) {

+            e.printStackTrace();

+            return 1;

+        }

+

+        return 0;

+    }

+

+    private void codeGen() throws AlgebricksException {

+        try {

+            // number of cpu cores in the cluster

+            builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));

+        } catch (Exception e) {

+            throw new AlgebricksException(e);

+        }

+        // builder.setClusterTopology(ConfUtil.getClusterTopology());

+        builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);

+        builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);

+        builder.setComparatorFactoryProvider(HiveBinaryComparatorFactoryProvider.INSTANCE);

+        builder.setExpressionRuntimeProvider(HiveExpressionRuntimeProvider.INSTANCE);

+        builder.setHashFunctionFactoryProvider(HiveBinaryHashFunctionFactoryProvider.INSTANCE);

+        builder.setPrinterProvider(HivePrinterFactoryProvider.INSTANCE);

+        builder.setSerializerDeserializerProvider(HiveSerializerDeserializerProvider.INSTANCE);

+        builder.setNullWriterFactory(HiveNullWriterFactory.INSTANCE);

+        builder.setNormalizedKeyComputerFactoryProvider(HiveNormalizedKeyComputerFactoryProvider.INSTANCE);

+        builder.setPartialAggregationTypeComputer(HivePartialAggregationTypeComputer.INSTANCE);

+        builder.setTypeTraitProvider(HiveTypeTraitProvider.INSTANCE);

+        builder.setHashFunctionFamilyProvider(HiveBinaryHashFunctionFamilyProvider.INSTANCE);

+

+        jobSpec = compiler.createJob(null);

+

+        // set the policy

+        String policyStr = conf.get("hive.hyracks.connectorpolicy");

+        if (policyStr == null)

+            policyStr = "PIPELINING";

+        Policy policyValue = Policy.valueOf(policyStr);

+        jobSpec.setConnectorPolicyAssignmentPolicy(new HiveConnectorPolicyAssignmentPolicy(policyValue));

+        jobSpec.setUseConnectorPolicyForScheduling(false);

+    }

+

+    @Override

+    public int executeJob() {

+        try {

+            codeGen();

+            executeHyracksJob(jobSpec);

+        } catch (Exception e) {

+            e.printStackTrace();

+            return 1;

+        }

+        return 0;

+    }

+

+    private List<Operator> generateRootOperatorDAG(List<Task<? extends Serializable>> rootTasks,

+            HashMap<String, PartitionDesc> aliasToPath) {

+

+        List<Operator> rootOps = new ArrayList<Operator>();

+        List<Task<? extends Serializable>> toDelete = new ArrayList<Task<? extends Serializable>>();

+        tasksVisited.clear();

+

+        for (int i = rootTasks.size() - 1; i >= 0; i--) {

+            /**

+             * list of map-reduce tasks

+             */

+            Task<? extends Serializable> task = rootTasks.get(i);

+

+            if (task instanceof MapRedTask) {

+                List<Operator> mapRootOps = articulateMapReduceOperators(task, rootOps, aliasToPath, rootTasks);

+                if (i == 0)

+                    rootOps.addAll(mapRootOps);

+                else {

+                    List<Operator> leaves = new ArrayList<Operator>();

+                    getLeaves(rootOps, leaves);

+

+                    List<Operator> mapChildren = new ArrayList<Operator>();

+                    for (Operator childMap : mapRootOps) {

+                        if (childMap instanceof TableScanOperator) {

+                            TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

+                            if (topDesc == null)

+                                mapChildren.add(childMap);

+                            else {

+                                rootOps.add(childMap);

+                            }

+                        } else

+                            mapChildren.add(childMap);

+                    }

+

+                    if (mapChildren.size() > 0) {

+                        for (Operator leaf : leaves)

+                            leaf.setChildOperators(mapChildren);

+                        for (Operator child : mapChildren)

+                            child.setParentOperators(leaves);

+                    }

+                }

+

+                MapredWork mr = (MapredWork) task.getWork();

+                HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

+

+                addAliasToPartition(aliasToPath, map);

+                toDelete.add(task);

+            }

+        }

+

+        for (Task<? extends Serializable> task : toDelete)

+            rootTasks.remove(task);

+

+        return rootOps;

+    }

+

+    private void addAliasToPartition(HashMap<String, PartitionDesc> aliasToPath, HashMap<String, PartitionDesc> map) {

+        Iterator<String> keys = map.keySet().iterator();

+        while (keys.hasNext()) {

+            String key = keys.next();

+            PartitionDesc part = map.get(key);

+            String[] names = key.split(":");

+            for (String name : names) {

+                aliasToPath.put(name, part);

+            }

+        }

+    }

+

+    private List<Operator> articulateMapReduceOperators(Task task, List<Operator> rootOps,

+            HashMap<String, PartitionDesc> aliasToPath, List<Task<? extends Serializable>> rootTasks) {

+        // System.out.println("!"+task.getName());

+        if (!(task instanceof MapRedTask)) {

+            if (!(task instanceof ConditionalTask)) {

+                rootTasks.add(task);

+                return null;

+            } else {

+                // remove map-reduce branches in condition task

+                ConditionalTask condition = (ConditionalTask) task;

+                List<Task<? extends Serializable>> branches = condition.getListTasks();

+                for (int i = branches.size() - 1; i >= 0; i--) {

+                    Task branch = branches.get(i);

+                    if (branch instanceof MapRedTask) {

+                        return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);

+                    }

+                }

+                rootTasks.add(task);

+                return null;

+            }

+        }

+

+        MapredWork mr = (MapredWork) task.getWork();

+        HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

+

+        // put all aliasToParitionDesc mapping into the map

+        addAliasToPartition(aliasToPath, map);

+

+        MapRedTask mrtask = (MapRedTask) task;

+        MapredWork work = (MapredWork) mrtask.getWork();

+        HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();

+

+        Set entries = operators.entrySet();

+        Iterator<Entry<String, Operator>> iterator = entries.iterator();

+        List<Operator> mapRootOps = new ArrayList<Operator>();

+

+        // get map root operators

+        while (iterator.hasNext()) {

+            Operator next = iterator.next().getValue();

+            if (!mapRootOps.contains(next)) {

+                // clear that only for the case of union

+                mapRootOps.add(next);

+            }

+        }

+

+        // get map local work

+        MapredLocalWork localWork = work.getMapLocalWork();

+        if (localWork != null) {

+            HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();

+

+            Set localEntries = localOperators.entrySet();

+            Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();

+            while (localIterator.hasNext()) {

+                mapRootOps.add(localIterator.next().getValue());

+            }

+

+            HashMap<String, FetchWork> localFetch = localWork.getAliasToFetchWork();

+            Set localFetchEntries = localFetch.entrySet();

+            Iterator<Entry<String, FetchWork>> localFetchIterator = localFetchEntries.iterator();

+            while (localFetchIterator.hasNext()) {

+                Entry<String, FetchWork> fetchMap = localFetchIterator.next();

+                FetchWork fetch = fetchMap.getValue();

+                String alias = fetchMap.getKey();

+                List<PartitionDesc> dirPart = fetch.getPartDesc();

+

+                // temporary hack: put the first partitionDesc into the map

+                aliasToPath.put(alias, dirPart.get(0));

+            }

+        }

+

+        Boolean visited = tasksVisited.get(task);

+        if (visited != null && visited.booleanValue() == true) {

+            return mapRootOps;

+        }

+

+        // do that only for union operator

+        for (Operator op : mapRootOps)

+            if (op.getParentOperators() != null)

+                op.getParentOperators().clear();

+

+        List<Operator> mapLeaves = new ArrayList<Operator>();

+        downToLeaves(mapRootOps, mapLeaves);

+        List<Operator> reduceOps = new ArrayList<Operator>();

+

+        if (work.getReducer() != null)

+            reduceOps.add(work.getReducer());

+

+        for (Operator mapLeaf : mapLeaves) {

+            mapLeaf.setChildOperators(reduceOps);

+        }

+

+        for (Operator reduceOp : reduceOps) {

+            if (reduceOp != null)

+                reduceOp.setParentOperators(mapLeaves);

+        }

+

+        List<Operator> leafs = new ArrayList<Operator>();

+        if (reduceOps.size() > 0) {

+            downToLeaves(reduceOps, leafs);

+        } else {

+            leafs = mapLeaves;

+        }

+

+        List<Operator> mapChildren = new ArrayList<Operator>();

+        if (task.getChildTasks() != null && task.getChildTasks().size() > 0) {

+            for (Object child : task.getChildTasks()) {

+                List<Operator> childMapOps = articulateMapReduceOperators((Task) child, rootOps, aliasToPath, rootTasks);

+                if (childMapOps == null)

+                    continue;

+

+                for (Operator childMap : childMapOps) {

+                    if (childMap instanceof TableScanOperator) {

+                        TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

+                        if (topDesc == null)

+                            mapChildren.add(childMap);

+                        else {

+                            rootOps.add(childMap);

+                        }

+                    } else {

+                        // if not table scan, add the child

+                        mapChildren.add(childMap);

+                    }

+                }

+            }

+

+            if (mapChildren.size() > 0) {

+                int i = 0;

+                for (Operator leaf : leafs) {

+                    if (leaf.getChildOperators() == null || leaf.getChildOperators().size() == 0)

+                        leaf.setChildOperators(new ArrayList<Operator>());

+                    leaf.getChildOperators().add(mapChildren.get(i));

+                    i++;

+                }

+                i = 0;

+                for (Operator child : mapChildren) {

+                    if (child.getParentOperators() == null || child.getParentOperators().size() == 0)

+                        child.setParentOperators(new ArrayList<Operator>());

+                    child.getParentOperators().add(leafs.get(i));

+                    i++;

+                }

+            }

+        }

+

+        // mark this task as visited

+        this.tasksVisited.put(task, true);

+        return mapRootOps;

+    }

+

+    /**

+     * down to leaf nodes

+     * 

+     * @param ops

+     * @param leaves

+     */

+    private void downToLeaves(List<Operator> ops, List<Operator> leaves) {

+

+        // Operator currentOp;

+        for (Operator op : ops) {

+            if (op != null && op.getChildOperators() != null && op.getChildOperators().size() > 0) {

+                downToLeaves(op.getChildOperators(), leaves);

+            } else {

+                if (op != null && leaves.indexOf(op) < 0)

+                    leaves.add(op);

+            }

+        }

+    }

+

+    private void getLeaves(List<Operator> roots, List<Operator> currentLeaves) {

+        for (Operator op : roots) {

+            List<Operator> children = op.getChildOperators();

+            if (children == null || children.size() <= 0) {

+                currentLeaves.add(op);

+            } else {

+                getLeaves(children, currentLeaves);

+            }

+        }

+    }

+

+    private void executeHyracksJob(JobSpecification job) throws Exception {

+

+        /**

+         * load the properties file if it is not loaded

+         */

+        if (clusterProps == null) {

+            clusterProps = new Properties();

+            InputStream confIn = new FileInputStream(clusterPropertiesPath);

+            clusterProps.load(confIn);

+            confIn.close();

+        }

+

+        if (hcc == null) {

+            BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));

+            String masterNode = ipReader.readLine();

+            ipReader.close();

+

+            InetAddress[] ips = InetAddress.getAllByName(masterNode);

+            int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));

+            for (InetAddress ip : ips) {

+                if (ip.getAddress().length <= 4) {

+                    try {

+                        hcc = new HyracksConnection(ip.getHostAddress(), port);

+                        break;

+                    } catch (Exception e) {

+                        continue;

+                    }

+                }

+            }

+        }

+

+        String applicationName = "hivesterix";

+        long start = System.currentTimeMillis();

+        JobId jobId = hcc.startJob(applicationName, job);

+        hcc.waitForCompletion(jobId);

+

+        // System.out.println("job finished: " + jobId.toString());

+        // call all leave nodes to end

+        for (Operator leaf : leaveOps) {

+            jobClose(leaf);

+        }

+

+        long end = System.currentTimeMillis();

+        System.err.println(start + " " + end + " " + (end - start));

+    }

+

+    /**

+     * mv to final directory on hdfs (not real final)

+     * 

+     * @param leaf

+     * @throws Exception

+     */

+    private void jobClose(Operator leaf) throws Exception {

+        FileSinkOperator fsOp = (FileSinkOperator) leaf;

+        FileSinkDesc desc = fsOp.getConf();

+        boolean isNativeTable = !desc.getTableInfo().isNonNative();

+        if ((conf != null) && isNativeTable) {

+            String specPath = desc.getDirName();

+            DynamicPartitionCtx dpCtx = desc.getDynPartCtx();

+            // for 0.7.0

+            fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);

+            // for 0.8.0

+            // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,

+            // desc);

+        }

+    }

+}

diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
new file mode 100644
index 0000000..c64a39b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/IExecutionEngine.java
@@ -0,0 +1,25 @@
+package edu.uci.ics.hivesterix.runtime.exec;

+

+import java.io.Serializable;

+import java.util.List;

+

+import org.apache.hadoop.hive.ql.exec.Task;

+

+public interface IExecutionEngine {

+

+    /**

+     * compile the job

+     * 

+     * @param rootTasks

+     *            : Hive MapReduce plan

+     * @return 0 pass, 1 fail

+     */

+    public int compileJob(List<Task<? extends Serializable>> rootTasks);

+

+    /**

+     * execute the job with latest compiled plan

+     * 

+     * @return

+     */

+    public int executeJob();

+}

diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
new file mode 100644
index 0000000..a385742
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -0,0 +1,1310 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql;
+
+import java.io.DataInput;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.JavaUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.Schema;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.ExecDriver;
+import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.exec.TaskResult;
+import org.apache.hadoop.hive.ql.exec.TaskRunner;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
+import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
+import org.apache.hadoop.hive.ql.hooks.Hook;
+import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.apache.hadoop.hive.ql.hooks.PostExecute;
+import org.apache.hadoop.hive.ql.hooks.PreExecute;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockManagerCtx;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockMode;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject;
+import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData;
+import org.apache.hadoop.hive.ql.lockmgr.LockException;
+import org.apache.hadoop.hive.ql.metadata.AuthorizationException;
+import org.apache.hadoop.hive.ql.metadata.DummyPartition;
+import org.apache.hadoop.hive.ql.metadata.Hive;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveUtils;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.AbstractSemanticAnalyzerHook;
+import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ErrorMsg;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
+import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
+import org.apache.hadoop.hive.ql.parse.ParseContext;
+import org.apache.hadoop.hive.ql.parse.ParseDriver;
+import org.apache.hadoop.hive.ql.parse.ParseException;
+import org.apache.hadoop.hive.ql.parse.ParseUtils;
+import org.apache.hadoop.hive.ql.parse.PrunedPartitionList;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.VariableSubstitution;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolver;
+import org.apache.hadoop.hive.ql.plan.ConditionalResolverMergeFiles;
+import org.apache.hadoop.hive.ql.plan.HiveOperation;
+import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.ql.processors.CommandProcessor;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.shims.ShimLoader;
+import org.apache.hadoop.mapred.ClusterStatus;
+import org.apache.hadoop.mapred.JobClient;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
+
+import edu.uci.ics.hivesterix.runtime.exec.HyracksExecutionEngine;
+import edu.uci.ics.hivesterix.runtime.exec.IExecutionEngine;
+
+@SuppressWarnings({ "deprecation", "unused" })
+public class Driver implements CommandProcessor {
+
+    static final private Log LOG = LogFactory.getLog(Driver.class.getName());
+    static final private LogHelper console = new LogHelper(LOG);
+
+    // hive-sterix
+    private IExecutionEngine engine;
+    private boolean hivesterix = false;
+
+    private int maxRows = 100;
+    ByteStream.Output bos = new ByteStream.Output();
+
+    private HiveConf conf;
+    private DataInput resStream;
+    private Context ctx;
+    private QueryPlan plan;
+    private Schema schema;
+    private HiveLockManager hiveLockMgr;
+
+    private String errorMessage;
+    private String SQLState;
+
+    // A limit on the number of threads that can be launched
+    private int maxthreads;
+    private final int sleeptime = 2000;
+
+    protected int tryCount = Integer.MAX_VALUE;
+
+    private int checkLockManager() {
+        boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+        if (supportConcurrency && (hiveLockMgr == null)) {
+            try {
+                setLockManager();
+            } catch (SemanticException e) {
+                errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+                SQLState = ErrorMsg.findSQLState(e.getMessage());
+                console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+                return (12);
+            }
+        }
+        return (0);
+    }
+
+    private void setLockManager() throws SemanticException {
+        boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+        if (supportConcurrency) {
+            String lockMgr = conf.getVar(HiveConf.ConfVars.HIVE_LOCK_MANAGER);
+            if ((lockMgr == null) || (lockMgr.isEmpty())) {
+                throw new SemanticException(ErrorMsg.LOCKMGR_NOT_SPECIFIED.getMsg());
+            }
+
+            try {
+                hiveLockMgr = (HiveLockManager) ReflectionUtils.newInstance(conf.getClassByName(lockMgr), conf);
+                hiveLockMgr.setContext(new HiveLockManagerCtx(conf));
+            } catch (Exception e) {
+                throw new SemanticException(ErrorMsg.LOCKMGR_NOT_INITIALIZED.getMsg() + e.getMessage());
+            }
+        }
+    }
+
+    public void init() {
+        Operator.resetId();
+    }
+
+    /**
+     * Return the status information about the Map-Reduce cluster
+     */
+    public ClusterStatus getClusterStatus() throws Exception {
+        ClusterStatus cs;
+        try {
+            JobConf job = new JobConf(conf, ExecDriver.class);
+            JobClient jc = new JobClient(job);
+            cs = jc.getClusterStatus();
+        } catch (Exception e) {
+            e.printStackTrace();
+            throw e;
+        }
+        LOG.info("Returning cluster status: " + cs.toString());
+        return cs;
+    }
+
+    public Schema getSchema() {
+        return schema;
+    }
+
+    /**
+     * Get a Schema with fields represented with native Hive types
+     */
+    public static Schema getSchema(BaseSemanticAnalyzer sem, HiveConf conf) {
+        Schema schema = null;
+
+        // If we have a plan, prefer its logical result schema if it's
+        // available; otherwise, try digging out a fetch task; failing that,
+        // give up.
+        if (sem == null) {
+            // can't get any info without a plan
+        } else if (sem.getResultSchema() != null) {
+            List<FieldSchema> lst = sem.getResultSchema();
+            schema = new Schema(lst, null);
+        } else if (sem.getFetchTask() != null) {
+            FetchTask ft = sem.getFetchTask();
+            TableDesc td = ft.getTblDesc();
+            // partitioned tables don't have tableDesc set on the FetchTask.
+            // Instead
+            // they have a list of PartitionDesc objects, each with a table
+            // desc.
+            // Let's
+            // try to fetch the desc for the first partition and use it's
+            // deserializer.
+            if (td == null && ft.getWork() != null && ft.getWork().getPartDesc() != null) {
+                if (ft.getWork().getPartDesc().size() > 0) {
+                    td = ft.getWork().getPartDesc().get(0).getTableDesc();
+                }
+            }
+
+            if (td == null) {
+                LOG.info("No returning schema.");
+            } else {
+                String tableName = "result";
+                List<FieldSchema> lst = null;
+                try {
+                    lst = MetaStoreUtils.getFieldsFromDeserializer(tableName, td.getDeserializer());
+                } catch (Exception e) {
+                    LOG.warn("Error getting schema: " + org.apache.hadoop.util.StringUtils.stringifyException(e));
+                }
+                if (lst != null) {
+                    schema = new Schema(lst, null);
+                }
+            }
+        }
+        if (schema == null) {
+            schema = new Schema();
+        }
+        LOG.info("Returning Hive schema: " + schema);
+        return schema;
+    }
+
+    /**
+     * Get a Schema with fields represented with Thrift DDL types
+     */
+    public Schema getThriftSchema() throws Exception {
+        Schema schema;
+        try {
+            schema = getSchema();
+            if (schema != null) {
+                List<FieldSchema> lst = schema.getFieldSchemas();
+                // Go over the schema and convert type to thrift type
+                if (lst != null) {
+                    for (FieldSchema f : lst) {
+                        f.setType(MetaStoreUtils.typeToThriftType(f.getType()));
+                    }
+                }
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+            throw e;
+        }
+        LOG.info("Returning Thrift schema: " + schema);
+        return schema;
+    }
+
+    /**
+     * Return the maximum number of rows returned by getResults
+     */
+    public int getMaxRows() {
+        return maxRows;
+    }
+
+    /**
+     * Set the maximum number of rows returned by getResults
+     */
+    public void setMaxRows(int maxRows) {
+        this.maxRows = maxRows;
+    }
+
+    public boolean hasReduceTasks(List<Task<? extends Serializable>> tasks) {
+        if (tasks == null) {
+            return false;
+        }
+
+        boolean hasReduce = false;
+        for (Task<? extends Serializable> task : tasks) {
+            if (task.hasReduce()) {
+                return true;
+            }
+
+            hasReduce = (hasReduce || hasReduceTasks(task.getChildTasks()));
+        }
+        return hasReduce;
+    }
+
+    /**
+     * for backwards compatibility with current tests
+     */
+    public Driver(HiveConf conf) {
+        this.conf = conf;
+
+        // hivesterix
+        engine = new HyracksExecutionEngine(conf);
+    }
+
+    public Driver() {
+        if (SessionState.get() != null) {
+            conf = SessionState.get().getConf();
+        }
+
+        // hivesterix
+        engine = new HyracksExecutionEngine(conf);
+    }
+
+    // hivesterix: plan printer
+    public Driver(HiveConf conf, PrintWriter planPrinter) {
+        this.conf = conf;
+        engine = new HyracksExecutionEngine(conf, planPrinter);
+    }
+
+    public void clear() {
+        this.hivesterix = false;
+    }
+
+    /**
+     * Compile a new query. Any currently-planned query associated with this
+     * Driver is discarded.
+     * 
+     * @param command
+     *            The SQL query to compile.
+     */
+    public int compile(String command) {
+        if (plan != null) {
+            close();
+            plan = null;
+        }
+
+        TaskFactory.resetId();
+
+        try {
+            command = new VariableSubstitution().substitute(conf, command);
+            ctx = new Context(conf);
+
+            ParseDriver pd = new ParseDriver();
+            ASTNode tree = pd.parse(command, ctx);
+            tree = ParseUtils.findRootNonNullToken(tree);
+
+            BaseSemanticAnalyzer sem = SemanticAnalyzerFactory.get(conf, tree);
+            List<AbstractSemanticAnalyzerHook> saHooks = getSemanticAnalyzerHooks();
+
+            // Do semantic analysis and plan generation
+            if (saHooks != null) {
+                HiveSemanticAnalyzerHookContext hookCtx = new HiveSemanticAnalyzerHookContextImpl();
+                hookCtx.setConf(conf);
+                for (AbstractSemanticAnalyzerHook hook : saHooks) {
+                    tree = hook.preAnalyze(hookCtx, tree);
+                }
+                sem.analyze(tree, ctx);
+                for (AbstractSemanticAnalyzerHook hook : saHooks) {
+                    hook.postAnalyze(hookCtx, sem.getRootTasks());
+                }
+            } else {
+                sem.analyze(tree, ctx);
+            }
+
+            LOG.info("Semantic Analysis Completed");
+
+            // validate the plan
+            sem.validate();
+
+            plan = new QueryPlan(command, sem);
+            // initialize FetchTask right here
+            if (plan.getFetchTask() != null) {
+                plan.getFetchTask().initialize(conf, plan, null);
+            }
+
+            // get the output schema
+            schema = getSchema(sem, conf);
+
+            // test Only - serialize the query plan and deserialize it
+            if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+
+                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
+
+                String queryPlanFileName = ctx.getLocalScratchDir(true) + Path.SEPARATOR_CHAR + "queryplan.xml";
+                LOG.info("query plan = " + queryPlanFileName);
+                queryPlanFileName = new Path(queryPlanFileName).toUri().getPath();
+
+                // serialize the queryPlan
+                FileOutputStream fos = new FileOutputStream(queryPlanFileName);
+                Utilities.serializeQueryPlan(plan, fos);
+                fos.close();
+
+                // deserialize the queryPlan
+                FileInputStream fis = new FileInputStream(queryPlanFileName);
+                QueryPlan newPlan = Utilities.deserializeQueryPlan(fis, conf);
+                fis.close();
+
+                // Use the deserialized plan
+                plan = newPlan;
+            }
+
+            // initialize FetchTask right here
+            if (plan.getFetchTask() != null) {
+                plan.getFetchTask().initialize(conf, plan, null);
+            }
+
+            // do the authorization check
+            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_AUTHORIZATION_ENABLED)) {
+                try {
+                    // doAuthorization(sem);
+                } catch (AuthorizationException authExp) {
+                    console.printError("Authorization failed:" + authExp.getMessage()
+                            + ". Use show grant to get more details.");
+                    return 403;
+                }
+            }
+
+            // hyracks run
+            if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
+                hivesterix = true;
+                return engine.compileJob(sem.getRootTasks());
+            }
+
+            return 0;
+        } catch (SemanticException e) {
+            errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
+            SQLState = ErrorMsg.findSQLState(e.getMessage());
+            console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (10);
+        } catch (ParseException e) {
+            errorMessage = "FAILED: Parse Error: " + e.getMessage();
+            SQLState = ErrorMsg.findSQLState(e.getMessage());
+            console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (11);
+        } catch (Exception e) {
+            errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+            SQLState = ErrorMsg.findSQLState(e.getMessage());
+            console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (12);
+        }
+    }
+
+    private void doAuthorization(BaseSemanticAnalyzer sem) throws HiveException, AuthorizationException {
+        HashSet<ReadEntity> inputs = sem.getInputs();
+        HashSet<WriteEntity> outputs = sem.getOutputs();
+        SessionState ss = SessionState.get();
+        HiveOperation op = ss.getHiveOperation();
+        Hive db = sem.getDb();
+        if (op != null) {
+            if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.CREATETABLE)) {
+                ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+                        HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+            } else {
+                // if (op.equals(HiveOperation.IMPORT)) {
+                // ImportSemanticAnalyzer isa = (ImportSemanticAnalyzer) sem;
+                // if (!isa.existsTable()) {
+                ss.getAuthorizer().authorize(db.getDatabase(db.getCurrentDatabase()), null,
+                        HiveOperation.CREATETABLE_AS_SELECT.getOutputRequiredPrivileges());
+                // }
+                // }
+            }
+            if (outputs != null && outputs.size() > 0) {
+                for (WriteEntity write : outputs) {
+
+                    if (write.getType() == WriteEntity.Type.PARTITION) {
+                        Partition part = db.getPartition(write.getTable(), write.getPartition().getSpec(), false);
+                        if (part != null) {
+                            ss.getAuthorizer().authorize(write.getPartition(), null, op.getOutputRequiredPrivileges());
+                            continue;
+                        }
+                    }
+
+                    if (write.getTable() != null) {
+                        ss.getAuthorizer().authorize(write.getTable(), null, op.getOutputRequiredPrivileges());
+                    }
+                }
+
+            }
+        }
+
+        if (inputs != null && inputs.size() > 0) {
+
+            Map<Table, List<String>> tab2Cols = new HashMap<Table, List<String>>();
+            Map<Partition, List<String>> part2Cols = new HashMap<Partition, List<String>>();
+
+            Map<String, Boolean> tableUsePartLevelAuth = new HashMap<String, Boolean>();
+            for (ReadEntity read : inputs) {
+                if (read.getPartition() != null) {
+                    Table tbl = read.getTable();
+                    String tblName = tbl.getTableName();
+                    if (tableUsePartLevelAuth.get(tblName) == null) {
+                        boolean usePartLevelPriv = (tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE") != null && ("TRUE"
+                                .equalsIgnoreCase(tbl.getParameters().get("PARTITION_LEVEL_PRIVILEGE"))));
+                        if (usePartLevelPriv) {
+                            tableUsePartLevelAuth.put(tblName, Boolean.TRUE);
+                        } else {
+                            tableUsePartLevelAuth.put(tblName, Boolean.FALSE);
+                        }
+                    }
+                }
+            }
+
+            if (op.equals(HiveOperation.CREATETABLE_AS_SELECT) || op.equals(HiveOperation.QUERY)) {
+                SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
+                ParseContext parseCtx = querySem.getParseContext();
+                Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
+
+                for (Map.Entry<String, Operator<? extends Serializable>> topOpMap : querySem.getParseContext()
+                        .getTopOps().entrySet()) {
+                    Operator<? extends Serializable> topOp = topOpMap.getValue();
+                    if (topOp instanceof TableScanOperator && tsoTopMap.containsKey(topOp)) {
+                        TableScanOperator tableScanOp = (TableScanOperator) topOp;
+                        Table tbl = tsoTopMap.get(tableScanOp);
+                        List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
+                        List<FieldSchema> columns = tbl.getCols();
+                        List<String> cols = new ArrayList<String>();
+                        if (neededColumnIds != null && neededColumnIds.size() > 0) {
+                            for (int i = 0; i < neededColumnIds.size(); i++) {
+                                cols.add(columns.get(neededColumnIds.get(i)).getName());
+                            }
+                        } else {
+                            for (int i = 0; i < columns.size(); i++) {
+                                cols.add(columns.get(i).getName());
+                            }
+                        }
+                        if (tbl.isPartitioned() && tableUsePartLevelAuth.get(tbl.getTableName())) {
+                            String alias_id = topOpMap.getKey();
+                            PrunedPartitionList partsList = PartitionPruner.prune(parseCtx.getTopToTable().get(topOp),
+                                    parseCtx.getOpToPartPruner().get(topOp), parseCtx.getConf(), alias_id,
+                                    parseCtx.getPrunedPartitions());
+                            Set<Partition> parts = new HashSet<Partition>();
+                            parts.addAll(partsList.getConfirmedPartns());
+                            parts.addAll(partsList.getUnknownPartns());
+                            for (Partition part : parts) {
+                                List<String> existingCols = part2Cols.get(part);
+                                if (existingCols == null) {
+                                    existingCols = new ArrayList<String>();
+                                }
+                                existingCols.addAll(cols);
+                                part2Cols.put(part, existingCols);
+                            }
+                        } else {
+                            List<String> existingCols = tab2Cols.get(tbl);
+                            if (existingCols == null) {
+                                existingCols = new ArrayList<String>();
+                            }
+                            existingCols.addAll(cols);
+                            tab2Cols.put(tbl, existingCols);
+                        }
+                    }
+                }
+            }
+
+            // cache the results for table authorization
+            Set<String> tableAuthChecked = new HashSet<String>();
+            for (ReadEntity read : inputs) {
+                Table tbl = null;
+                if (read.getPartition() != null) {
+                    tbl = read.getPartition().getTable();
+                    // use partition level authorization
+                    if (tableUsePartLevelAuth.get(tbl.getTableName())) {
+                        List<String> cols = part2Cols.get(read.getPartition());
+                        if (cols != null && cols.size() > 0) {
+                            ss.getAuthorizer().authorize(read.getPartition().getTable(), read.getPartition(), cols,
+                                    op.getInputRequiredPrivileges(), null);
+                        } else {
+                            ss.getAuthorizer().authorize(read.getPartition(), op.getInputRequiredPrivileges(), null);
+                        }
+                        continue;
+                    }
+                } else if (read.getTable() != null) {
+                    tbl = read.getTable();
+                }
+
+                // if we reach here, it means it needs to do a table
+                // authorization
+                // check, and the table authorization may already happened
+                // because of other
+                // partitions
+                if (tbl != null && !tableAuthChecked.contains(tbl.getTableName())) {
+                    List<String> cols = tab2Cols.get(tbl);
+                    if (cols != null && cols.size() > 0) {
+                        ss.getAuthorizer().authorize(tbl, null, cols, op.getInputRequiredPrivileges(), null);
+                    } else {
+                        ss.getAuthorizer().authorize(tbl, op.getInputRequiredPrivileges(), null);
+                    }
+                    tableAuthChecked.add(tbl.getTableName());
+                }
+            }
+
+        }
+    }
+
+    /**
+     * @return The current query plan associated with this Driver, if any.
+     */
+    public QueryPlan getPlan() {
+        return plan;
+    }
+
+    /**
+     * @param t
+     *            The table to be locked
+     * @param p
+     *            The partition to be locked
+     * @param mode
+     *            The mode of the lock (SHARED/EXCLUSIVE) Get the list of
+     *            objects to be locked. If a partition needs to be locked (in
+     *            any mode), all its parents should also be locked in SHARED
+     *            mode.
+     **/
+    private List<HiveLockObj> getLockObjects(Table t, Partition p, HiveLockMode mode) throws SemanticException {
+        List<HiveLockObj> locks = new LinkedList<HiveLockObj>();
+
+        HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+                .currentTimeMillis()), "IMPLICIT");
+
+        if (t != null) {
+            locks.add(new HiveLockObj(new HiveLockObject(t, lockData), mode));
+            mode = HiveLockMode.SHARED;
+            locks.add(new HiveLockObj(new HiveLockObject(t.getDbName(), lockData), mode));
+            return locks;
+        }
+
+        if (p != null) {
+            if (!(p instanceof DummyPartition)) {
+                locks.add(new HiveLockObj(new HiveLockObject(p, lockData), mode));
+            }
+
+            // All the parents are locked in shared mode
+            mode = HiveLockMode.SHARED;
+
+            // For dummy partitions, only partition name is needed
+            String name = p.getName();
+
+            if (p instanceof DummyPartition) {
+                name = p.getName().split("@")[2];
+            }
+
+            String partName = name;
+            String partialName = "";
+            String[] partns = name.split("/");
+            int len = p instanceof DummyPartition ? partns.length : partns.length - 1;
+            for (int idx = 0; idx < len; idx++) {
+                String partn = partns[idx];
+                partialName += partn;
+                try {
+                    locks.add(new HiveLockObj(new HiveLockObject(new DummyPartition(p.getTable(), p.getTable()
+                            .getDbName() + "/" + p.getTable().getTableName() + "/" + partialName), lockData), mode));
+                    partialName += "/";
+                } catch (HiveException e) {
+                    throw new SemanticException(e.getMessage());
+                }
+            }
+
+            locks.add(new HiveLockObj(new HiveLockObject(p.getTable(), lockData), mode));
+            locks.add(new HiveLockObj(new HiveLockObject(p.getTable().getDbName(), lockData), mode));
+        }
+        return locks;
+    }
+
+    /**
+     * Acquire read and write locks needed by the statement. The list of objects
+     * to be locked are obtained from he inputs and outputs populated by the
+     * compiler. The lock acuisition scheme is pretty simple. If all the locks
+     * cannot be obtained, error out. Deadlock is avoided by making sure that
+     * the locks are lexicographically sorted.
+     **/
+    public int acquireReadWriteLocks() {
+        try {
+            int sleepTime = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_SLEEP_BETWEEN_RETRIES) * 1000;
+            int numRetries = conf.getIntVar(HiveConf.ConfVars.HIVE_LOCK_NUMRETRIES);
+
+            boolean supportConcurrency = conf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
+            if (!supportConcurrency) {
+                return 0;
+            }
+
+            List<HiveLockObj> lockObjects = new ArrayList<HiveLockObj>();
+
+            // Sort all the inputs, outputs.
+            // If a lock needs to be acquired on any partition, a read lock
+            // needs to be acquired on all
+            // its parents also
+            for (ReadEntity input : plan.getInputs()) {
+                if (input.getType() == ReadEntity.Type.TABLE) {
+                    lockObjects.addAll(getLockObjects(input.getTable(), null, HiveLockMode.SHARED));
+                } else {
+                    lockObjects.addAll(getLockObjects(null, input.getPartition(), HiveLockMode.SHARED));
+                }
+            }
+
+            for (WriteEntity output : plan.getOutputs()) {
+                if (output.getTyp() == WriteEntity.Type.TABLE) {
+                    lockObjects.addAll(getLockObjects(output.getTable(), null,
+                            output.isComplete() ? HiveLockMode.EXCLUSIVE : HiveLockMode.SHARED));
+                } else if (output.getTyp() == WriteEntity.Type.PARTITION) {
+                    lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.EXCLUSIVE));
+                }
+                // In case of dynamic queries, it is possible to have incomplete
+                // dummy partitions
+                else if (output.getTyp() == WriteEntity.Type.DUMMYPARTITION) {
+                    lockObjects.addAll(getLockObjects(null, output.getPartition(), HiveLockMode.SHARED));
+                }
+            }
+
+            if (lockObjects.isEmpty() && !ctx.isNeedLockMgr()) {
+                return 0;
+            }
+
+            int ret = checkLockManager();
+            if (ret != 0) {
+                return ret;
+            }
+
+            HiveLockObjectData lockData = new HiveLockObjectData(plan.getQueryId(), String.valueOf(System
+                    .currentTimeMillis()), "IMPLICIT");
+
+            // Lock the database also
+            try {
+                Hive db = Hive.get(conf);
+                lockObjects.add(new HiveLockObj(new HiveLockObject(db.getCurrentDatabase(), lockData),
+                        HiveLockMode.SHARED));
+            } catch (HiveException e) {
+                throw new SemanticException(e.getMessage());
+            }
+
+            ctx.setHiveLockMgr(hiveLockMgr);
+            List<HiveLock> hiveLocks = null;
+
+            int tryNum = 1;
+            do {
+
+                // ctx.getHiveLockMgr();
+                // hiveLocks = ctx.getHiveLockMgr().lock(lockObjects, false);
+
+                if (hiveLocks != null) {
+                    break;
+                }
+
+                tryNum++;
+                try {
+                    Thread.sleep(sleepTime);
+                } catch (InterruptedException e) {
+                }
+            } while (tryNum < numRetries);
+
+            if (hiveLocks == null) {
+                throw new SemanticException(ErrorMsg.LOCK_CANNOT_BE_ACQUIRED.getMsg());
+            } else {
+                ctx.setHiveLocks(hiveLocks);
+            }
+
+            return (0);
+        } catch (SemanticException e) {
+            errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+            SQLState = ErrorMsg.findSQLState(e.getMessage());
+            console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (10);
+        } catch (Exception e) {
+            errorMessage = "FAILED: Error in acquiring locks: " + e.getMessage();
+            SQLState = ErrorMsg.findSQLState(e.getMessage());
+            console.printError(errorMessage, "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (10);
+        }
+    }
+
+    /**
+     * Release all the locks acquired implicitly by the statement. Note that the
+     * locks acquired with 'keepAlive' set to True are not released.
+     **/
+    private void releaseLocks() {
+        if (ctx != null && ctx.getHiveLockMgr() != null) {
+            try {
+                ctx.getHiveLockMgr().close();
+                ctx.setHiveLocks(null);
+            } catch (LockException e) {
+            }
+        }
+    }
+
+    /**
+     * @param hiveLocks
+     *            list of hive locks to be released Release all the locks
+     *            specified. If some of the locks have already been released,
+     *            ignore them
+     **/
+    private void releaseLocks(List<HiveLock> hiveLocks) {
+        if (hiveLocks != null) {
+            ctx.getHiveLockMgr().releaseLocks(hiveLocks);
+        }
+        ctx.setHiveLocks(null);
+    }
+
+    public CommandProcessorResponse run(String command) {
+        errorMessage = null;
+        SQLState = null;
+
+        int ret = compile(command);
+        if (ret != 0) {
+            // releaseLocks(ctx.getHiveLocks());
+            return new CommandProcessorResponse(ret, errorMessage, SQLState);
+        }
+
+        // ret = acquireReadWriteLocks();
+        if (ret != 0) {
+            // releaseLocks(ctx.getHiveLocks());
+            return new CommandProcessorResponse(ret, errorMessage, SQLState);
+        }
+
+        ret = execute();
+        if (ret != 0) {
+            // releaseLocks(ctx.getHiveLocks());
+            return new CommandProcessorResponse(ret, errorMessage, SQLState);
+        }
+
+        // releaseLocks(ctx.getHiveLocks());
+        return new CommandProcessorResponse(ret);
+    }
+
+    private List<AbstractSemanticAnalyzerHook> getSemanticAnalyzerHooks() throws Exception {
+        ArrayList<AbstractSemanticAnalyzerHook> saHooks = new ArrayList<AbstractSemanticAnalyzerHook>();
+        String pestr = conf.getVar(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK);
+        if (pestr == null) {
+            return saHooks;
+        }
+        pestr = pestr.trim();
+        if (pestr.equals("")) {
+            return saHooks;
+        }
+
+        String[] peClasses = pestr.split(",");
+
+        for (String peClass : peClasses) {
+            try {
+                AbstractSemanticAnalyzerHook hook = HiveUtils.getSemanticAnalyzerHook(conf, peClass);
+                saHooks.add(hook);
+            } catch (HiveException e) {
+                console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+                throw e;
+            }
+        }
+
+        return saHooks;
+    }
+
+    private List<Hook> getPreExecHooks() throws Exception {
+        ArrayList<Hook> pehooks = new ArrayList<Hook>();
+        String pestr = conf.getVar(HiveConf.ConfVars.PREEXECHOOKS);
+        pestr = pestr.trim();
+        if (pestr.equals("")) {
+            return pehooks;
+        }
+
+        String[] peClasses = pestr.split(",");
+
+        for (String peClass : peClasses) {
+            try {
+                pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+            } catch (ClassNotFoundException e) {
+                console.printError("Pre Exec Hook Class not found:" + e.getMessage());
+                throw e;
+            }
+        }
+
+        return pehooks;
+    }
+
+    private List<Hook> getPostExecHooks() throws Exception {
+        ArrayList<Hook> pehooks = new ArrayList<Hook>();
+        String pestr = conf.getVar(HiveConf.ConfVars.POSTEXECHOOKS);
+        pestr = pestr.trim();
+        if (pestr.equals("")) {
+            return pehooks;
+        }
+
+        String[] peClasses = pestr.split(",");
+
+        for (String peClass : peClasses) {
+            try {
+                pehooks.add((Hook) Class.forName(peClass.trim(), true, JavaUtils.getClassLoader()).newInstance());
+            } catch (ClassNotFoundException e) {
+                console.printError("Post Exec Hook Class not found:" + e.getMessage());
+                throw e;
+            }
+        }
+
+        return pehooks;
+    }
+
+    public int execute() {
+        // execute hivesterix plan
+        if (hivesterix) {
+            hivesterix = false;
+            int ret = engine.executeJob();
+            if (ret != 0)
+                return ret;
+        }
+
+        boolean noName = StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HADOOPJOBNAME));
+        int maxlen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH);
+
+        String queryId = plan.getQueryId();
+        String queryStr = plan.getQueryStr();
+
+        conf.setVar(HiveConf.ConfVars.HIVEQUERYID, queryId);
+        conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, queryStr);
+        maxthreads = HiveConf.getIntVar(conf, HiveConf.ConfVars.EXECPARALLETHREADNUMBER);
+
+        try {
+            LOG.info("Starting command: " + queryStr);
+
+            plan.setStarted();
+
+            if (SessionState.get() != null) {
+                SessionState.get().getHiveHistory().startQuery(queryStr, conf.getVar(HiveConf.ConfVars.HIVEQUERYID));
+                SessionState.get().getHiveHistory().logPlanProgress(plan);
+            }
+            resStream = null;
+
+            HookContext hookContext = new HookContext(plan, conf);
+
+            for (Hook peh : getPreExecHooks()) {
+                if (peh instanceof ExecuteWithHookContext) {
+                    ((ExecuteWithHookContext) peh).run(hookContext);
+                } else if (peh instanceof PreExecute) {
+                    ((PreExecute) peh).run(SessionState.get(), plan.getInputs(), plan.getOutputs(), ShimLoader
+                            .getHadoopShims().getUGIForConf(conf));
+                }
+            }
+
+            int jobs = Utilities.getMRTasks(plan.getRootTasks()).size();
+            if (jobs > 0) {
+                console.printInfo("Total MapReduce jobs = " + jobs);
+            }
+            if (SessionState.get() != null) {
+                SessionState.get().getHiveHistory()
+                        .setQueryProperty(queryId, Keys.QUERY_NUM_TASKS, String.valueOf(jobs));
+                SessionState.get().getHiveHistory().setIdToTableMap(plan.getIdToTableNameMap());
+            }
+            String jobname = Utilities.abbreviate(queryStr, maxlen - 6);
+
+            // A runtime that launches runnable tasks as separate Threads
+            // through
+            // TaskRunners
+            // As soon as a task isRunnable, it is put in a queue
+            // At any time, at most maxthreads tasks can be running
+            // The main thread polls the TaskRunners to check if they have
+            // finished.
+
+            Queue<Task<? extends Serializable>> runnable = new LinkedList<Task<? extends Serializable>>();
+            Map<TaskResult, TaskRunner> running = new HashMap<TaskResult, TaskRunner>();
+
+            DriverContext driverCxt = new DriverContext(runnable, ctx);
+
+            // Add root Tasks to runnable
+
+            for (Task<? extends Serializable> tsk : plan.getRootTasks()) {
+                driverCxt.addToRunnable(tsk);
+            }
+
+            // Loop while you either have tasks running, or tasks queued up
+
+            while (running.size() != 0 || runnable.peek() != null) {
+                // Launch upto maxthreads tasks
+                while (runnable.peek() != null && running.size() < maxthreads) {
+                    Task<? extends Serializable> tsk = runnable.remove();
+                    console.printInfo("executing task " + tsk.getName());
+                    launchTask(tsk, queryId, noName, running, jobname, jobs, driverCxt);
+                }
+
+                // poll the Tasks to see which one completed
+                TaskResult tskRes = pollTasks(running.keySet());
+                TaskRunner tskRun = running.remove(tskRes);
+                Task<? extends Serializable> tsk = tskRun.getTask();
+                hookContext.addCompleteTask(tskRun);
+
+                int exitVal = tskRes.getExitVal();
+                if (exitVal != 0) {
+                    Task<? extends Serializable> backupTask = tsk.getAndInitBackupTask();
+                    if (backupTask != null) {
+                        errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+                                + tsk.getClass().getName();
+                        console.printError(errorMessage);
+
+                        errorMessage = "ATTEMPT: Execute BackupTask: " + backupTask.getClass().getName();
+                        console.printError(errorMessage);
+
+                        // add backup task to runnable
+                        if (DriverContext.isLaunchable(backupTask)) {
+                            driverCxt.addToRunnable(backupTask);
+                        }
+                        continue;
+
+                    } else {
+                        // TODO: This error messaging is not very informative.
+                        // Fix that.
+                        errorMessage = "FAILED: Execution Error, return code " + exitVal + " from "
+                                + tsk.getClass().getName();
+                        SQLState = "08S01";
+                        console.printError(errorMessage);
+                        if (running.size() != 0) {
+                            taskCleanup();
+                        }
+                        // in case we decided to run everything in local mode,
+                        // restore the
+                        // the jobtracker setting to its initial value
+                        ctx.restoreOriginalTracker();
+                        return 9;
+                    }
+                }
+
+                if (SessionState.get() != null) {
+                    SessionState.get().getHiveHistory()
+                            .setTaskProperty(queryId, tsk.getId(), Keys.TASK_RET_CODE, String.valueOf(exitVal));
+                    SessionState.get().getHiveHistory().endTask(queryId, tsk);
+                }
+
+                if (tsk.getChildTasks() != null) {
+                    for (Task<? extends Serializable> child : tsk.getChildTasks()) {
+                        // hivesterix: don't check launchable condition
+                        // if (DriverContext.isLaunchable(child)) {
+                        driverCxt.addToRunnable(child);
+                        // }
+                    }
+                }
+            }
+
+            // in case we decided to run everything in local mode, restore the
+            // the jobtracker setting to its initial value
+            ctx.restoreOriginalTracker();
+
+            // remove incomplete outputs.
+            // Some incomplete outputs may be added at the beginning, for eg:
+            // for dynamic partitions.
+            // remove them
+            HashSet<WriteEntity> remOutputs = new HashSet<WriteEntity>();
+            for (WriteEntity output : plan.getOutputs()) {
+                if (!output.isComplete()) {
+                    remOutputs.add(output);
+                }
+            }
+
+            for (WriteEntity output : remOutputs) {
+                plan.getOutputs().remove(output);
+            }
+
+            // Get all the post execution hooks and execute them.
+            for (Hook peh : getPostExecHooks()) {
+                if (peh instanceof ExecuteWithHookContext) {
+                    ((ExecuteWithHookContext) peh).run(hookContext);
+                } else if (peh instanceof PostExecute) {
+                    ((PostExecute) peh)
+                            .run(SessionState.get(), plan.getInputs(), plan.getOutputs(),
+                                    (SessionState.get() != null ? SessionState.get().getLineageState().getLineageInfo()
+                                            : null), ShimLoader.getHadoopShims().getUGIForConf(conf));
+                }
+            }
+
+            if (SessionState.get() != null) {
+                SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(0));
+                SessionState.get().getHiveHistory().printRowCount(queryId);
+            }
+        } catch (Exception e) {
+            if (SessionState.get() != null) {
+                SessionState.get().getHiveHistory().setQueryProperty(queryId, Keys.QUERY_RET_CODE, String.valueOf(12));
+            }
+            // TODO: do better with handling types of Exception here
+            errorMessage = "FAILED: Hive Internal Error: " + Utilities.getNameMessage(e);
+            SQLState = "08S01";
+            console.printError(errorMessage + "\n" + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return (12);
+        } finally {
+            if (SessionState.get() != null) {
+                SessionState.get().getHiveHistory().endQuery(queryId);
+            }
+            if (noName) {
+                conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, "");
+            }
+        }
+        plan.setDone();
+
+        if (SessionState.get() != null) {
+            try {
+                SessionState.get().getHiveHistory().logPlanProgress(plan);
+            } catch (Exception e) {
+            }
+        }
+        console.printInfo("OK");
+
+        return (0);
+    }
+
+    /**
+     * Launches a new task
+     * 
+     * @param tsk
+     *            task being launched
+     * @param queryId
+     *            Id of the query containing the task
+     * @param noName
+     *            whether the task has a name set
+     * @param running
+     *            map from taskresults to taskrunners
+     * @param jobname
+     *            name of the task, if it is a map-reduce job
+     * @param jobs
+     *            number of map-reduce jobs
+     * @param curJobNo
+     *            the sequential number of the next map-reduce job
+     * @return the updated number of last the map-reduce job launched
+     */
+
+    public void launchTask(Task<? extends Serializable> tsk, String queryId, boolean noName,
+            Map<TaskResult, TaskRunner> running, String jobname, int jobs, DriverContext cxt) {
+
+        if (SessionState.get() != null) {
+            SessionState.get().getHiveHistory().startTask(queryId, tsk, tsk.getClass().getName());
+        }
+        if (tsk.isMapRedTask() && !(tsk instanceof ConditionalTask)) {
+            if (noName) {
+                conf.setVar(HiveConf.ConfVars.HADOOPJOBNAME, jobname + "(" + tsk.getId() + ")");
+            }
+            cxt.incCurJobNo(1);
+            console.printInfo("Launching Job " + cxt.getCurJobNo() + " out of " + jobs);
+        }
+        tsk.initialize(conf, plan, cxt);
+        TaskResult tskRes = new TaskResult();
+        TaskRunner tskRun = new TaskRunner(tsk, tskRes);
+
+        // HiveConf.getBoolVar(conf, HiveConf.ConfVars.EXECPARALLEL) &&
+        // Launch Task: hivesterix tweak
+        if (tsk instanceof MapRedTask || tsk instanceof StatsTask) {
+            // Launch it in the parallel mode, as a separate thread only for MR
+            // tasks
+            tskRes.setRunning(false);
+            tskRes.setExitVal(0);
+        } else if (tsk instanceof ConditionalTask) {
+            ConditionalTask condTask = (ConditionalTask) tsk;
+            ConditionalResolver crs = condTask.getResolver();
+            if (crs instanceof ConditionalResolverMergeFiles) {
+                tskRes.setRunning(false);
+                tskRes.setExitVal(0);
+
+                List<Task<? extends Serializable>> children = condTask.getListTasks();
+                for (Task<? extends Serializable> child : children)
+                    if (child instanceof MapRedTask)
+                        cxt.addToRunnable(child);
+            }
+        } else {
+            tskRun.runSequential();
+        }
+        running.put(tskRes, tskRun);
+        return;
+    }
+
+    /**
+     * Cleans up remaining tasks in case of failure
+     */
+
+    public void taskCleanup() {
+        // The currently existing Shutdown hooks will be automatically called,
+        // killing the map-reduce processes.
+        // The non MR processes will be killed as well.
+        System.exit(9);
+    }
+
+    /**
+     * Polls running tasks to see if a task has ended.
+     * 
+     * @param results
+     *            Set of result objects for running tasks
+     * @return The result object for any completed/failed task
+     */
+
+    public TaskResult pollTasks(Set<TaskResult> results) {
+        Iterator<TaskResult> resultIterator = results.iterator();
+        while (true) {
+            while (resultIterator.hasNext()) {
+                TaskResult tskRes = resultIterator.next();
+                if (tskRes.isRunning() == false) {
+                    return tskRes;
+                }
+            }
+
+            // In this loop, nothing was found
+            // Sleep 10 seconds and restart
+            try {
+                Thread.sleep(sleeptime);
+            } catch (InterruptedException ie) {
+                // Do Nothing
+                ;
+            }
+            resultIterator = results.iterator();
+        }
+    }
+
+    public boolean getResults(ArrayList<String> res) throws IOException {
+        if (plan != null && plan.getFetchTask() != null) {
+            FetchTask ft = plan.getFetchTask();
+            ft.setMaxRows(maxRows);
+            return ft.fetch(res);
+        }
+
+        if (resStream == null) {
+            resStream = ctx.getStream();
+        }
+        if (resStream == null) {
+            return false;
+        }
+
+        int numRows = 0;
+        String row = null;
+
+        while (numRows < maxRows) {
+            if (resStream == null) {
+                if (numRows > 0) {
+                    return true;
+                } else {
+                    return false;
+                }
+            }
+
+            bos.reset();
+            Utilities.StreamStatus ss;
+            try {
+                ss = Utilities.readColumn(resStream, bos);
+                if (bos.getCount() > 0) {
+                    row = new String(bos.getData(), 0, bos.getCount(), "UTF-8");
+                } else if (ss == Utilities.StreamStatus.TERMINATED) {
+                    row = new String();
+                }
+
+                if (row != null) {
+                    numRows++;
+                    res.add(row);
+                }
+            } catch (IOException e) {
+                console.printError("FAILED: Unexpected IO exception : " + e.getMessage());
+                res = null;
+                return false;
+            }
+
+            if (ss == Utilities.StreamStatus.EOF) {
+                resStream = ctx.getStream();
+            }
+        }
+        return true;
+    }
+
+    public int close() {
+        try {
+            if (plan != null) {
+                FetchTask fetchTask = plan.getFetchTask();
+                if (null != fetchTask) {
+                    try {
+                        fetchTask.clearFetch();
+                    } catch (Exception e) {
+                        LOG.debug(" Exception while clearing the Fetch task ", e);
+                    }
+                }
+            }
+            if (ctx != null) {
+                ctx.clear();
+            }
+            if (null != resStream) {
+                try {
+                    ((FSDataInputStream) resStream).close();
+                } catch (Exception e) {
+                    LOG.debug(" Exception while closing the resStream ", e);
+                }
+            }
+        } catch (Exception e) {
+            console.printError("FAILED: Hive Internal Error: " + Utilities.getNameMessage(e) + "\n"
+                    + org.apache.hadoop.util.StringUtils.stringifyException(e));
+            return 13;
+        }
+
+        return 0;
+    }
+
+    public void destroy() {
+        releaseLocks();
+    }
+
+    public org.apache.hadoop.hive.ql.plan.api.Query getQueryPlan() throws IOException {
+        return plan.getQueryPlan();
+    }
+
+    public int getTryCount() {
+        return tryCount;
+    }
+
+    public void setTryCount(int tryCount) {
+        this.tryCount = tryCount;
+    }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
new file mode 100644
index 0000000..0f445f4
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFAverage.java
@@ -0,0 +1,233 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFAverage.
+ */
+@Description(name = "avg", value = "_FUNC_(x) - Returns the mean of a set of numbers")
+public class GenericUDAFAverage extends AbstractGenericUDAFResolver {
+
+    static final Log LOG = LogFactory.getLog(GenericUDAFAverage.class.getName());
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        if (parameters.length != 1) {
+            throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+        }
+
+        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+                    + parameters[0].getTypeName() + " is passed.");
+        }
+        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+            case BYTE:
+            case SHORT:
+            case INT:
+            case LONG:
+            case FLOAT:
+            case DOUBLE:
+            case STRING:
+                return new GenericUDAFAverageEvaluator();
+            case BOOLEAN:
+            default:
+                throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+                        + parameters[0].getTypeName() + " is passed.");
+        }
+    }
+
+    /**
+     * GenericUDAFAverageEvaluator.
+     */
+    public static class GenericUDAFAverageEvaluator extends GenericUDAFEvaluator {
+
+        // For PARTIAL1 and COMPLETE
+        PrimitiveObjectInspector inputOI;
+
+        // For PARTIAL2 and FINAL
+        StructObjectInspector soi;
+        StructField countField;
+        StructField sumField;
+        LongObjectInspector countFieldOI;
+        DoubleObjectInspector sumFieldOI;
+
+        // For PARTIAL1 and PARTIAL2
+        Object[] partialResult;
+
+        // For FINAL and COMPLETE
+        DoubleWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            super.init(m, parameters);
+
+            // init input
+            if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+                inputOI = (PrimitiveObjectInspector) parameters[0];
+            } else {
+                soi = (StructObjectInspector) parameters[0];
+                countField = soi.getStructFieldRef("count");
+                sumField = soi.getStructFieldRef("sum");
+                countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+                sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+            }
+
+            // init output
+            if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+                // The output of a partial aggregation is a struct containing
+                // a "long" count and a "double" sum.
+
+                ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+                foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                ArrayList<String> fname = new ArrayList<String>();
+                fname.add("count");
+                fname.add("sum");
+                partialResult = new Object[2];
+                partialResult[0] = new LongWritable(0);
+                partialResult[1] = new DoubleWritable(0);
+                return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+            } else {
+                result = new DoubleWritable(0);
+                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+            }
+        }
+
+        static class AverageAgg implements SerializableBuffer {
+            long count;
+            double sum;
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                count = BufferSerDeUtil.getLong(data, start);
+                start += 8;
+                sum = BufferSerDeUtil.getDouble(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeLong(count, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(sum, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeLong(count);
+                output.writeDouble(sum);
+            }
+        };
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            AverageAgg result = new AverageAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            AverageAgg myagg = (AverageAgg) agg;
+            myagg.count = 0;
+            myagg.sum = 0;
+        }
+
+        boolean warned = false;
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            Object p = parameters[0];
+            if (p != null) {
+                AverageAgg myagg = (AverageAgg) agg;
+                try {
+                    double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+                    myagg.count++;
+                    myagg.sum += v;
+                } catch (NumberFormatException e) {
+                    if (!warned) {
+                        warned = true;
+                        LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+                        LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+                    }
+                }
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            AverageAgg myagg = (AverageAgg) agg;
+            ((LongWritable) partialResult[0]).set(myagg.count);
+            ((DoubleWritable) partialResult[1]).set(myagg.sum);
+            return partialResult;
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                AverageAgg myagg = (AverageAgg) agg;
+                Object partialCount = soi.getStructFieldData(partial, countField);
+                Object partialSum = soi.getStructFieldData(partial, sumField);
+                myagg.count += countFieldOI.get(partialCount);
+                myagg.sum += sumFieldOI.get(partialSum);
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            AverageAgg myagg = (AverageAgg) agg;
+            if (myagg.count == 0) {
+                return null;
+            } else {
+                result.set(myagg.sum / myagg.count);
+                return result;
+            }
+        }
+    }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
new file mode 100644
index 0000000..2c4022e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java
@@ -0,0 +1,392 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the Pearson correlation coefficient corr(x, y), using the following
+ * stable one-pass method, based on: "Formulas for Robust, One-Pass Parallel
+ * Computation of Covariances and Arbitrary-Order Statistical Moments", Philippe
+ * Pebay, Sandia Labs and
+ * "The Art of Computer Programming, volume 2: Seminumerical Algorithms", Donald
+ * Knuth.
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n - mx_n)(x_n - mx_(n-1)):
+ * <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n - my_(n-1)): <variance * n>
+ * Merge: c_(A,B) = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ * vx_(A,B) = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B)
+ * vy_(A,B) = vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+ */
+@Description(name = "corr", value = "_FUNC_(x,y) - Returns the Pearson coefficient of correlation\n"
+        + "between a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+        + "Any pair with a NULL is ignored. If the function is applied to an empty set or\n"
+        + "a singleton set, NULL will be returned. Otherwise, it computes the following:\n"
+        + "   COVAR_POP(x,y)/(STDDEV_POP(x)*STDDEV_POP(y))\n"
+        + "where neither x nor y is null,\n"
+        + "COVAR_POP is the population covariance,\n" + "and STDDEV_POP is the population standard deviation.")
+public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver {
+
+    static final Log LOG = LogFactory.getLog(GenericUDAFCorrelation.class.getName());
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        if (parameters.length != 2) {
+            throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+        }
+
+        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+                    + parameters[0].getTypeName() + " is passed.");
+        }
+
+        if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+                    + parameters[1].getTypeName() + " is passed.");
+        }
+
+        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+            case BYTE:
+            case SHORT:
+            case INT:
+            case LONG:
+            case FLOAT:
+            case DOUBLE:
+                switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+                    case BYTE:
+                    case SHORT:
+                    case INT:
+                    case LONG:
+                    case FLOAT:
+                    case DOUBLE:
+                        return new GenericUDAFCorrelationEvaluator();
+                    case STRING:
+                    case BOOLEAN:
+                    default:
+                        throw new UDFArgumentTypeException(1, "Only numeric type arguments are accepted but "
+                                + parameters[1].getTypeName() + " is passed.");
+                }
+            case STRING:
+            case BOOLEAN:
+            default:
+                throw new UDFArgumentTypeException(0, "Only numeric type arguments are accepted but "
+                        + parameters[0].getTypeName() + " is passed.");
+        }
+    }
+
+    /**
+     * Evaluate the Pearson correlation coefficient using a stable one-pass
+     * algorithm, based on work by Philippe Pébay and Donald Knuth.
+     * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+     * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+     * mx_(n-1))*(y_n - my_n) : <covariance * n> vx_n = vx_(n-1) + (x_n -
+     * mx_n)(x_n - mx_(n-1)): <variance * n> vy_n = vy_(n-1) + (y_n - my_n)(y_n
+     * - my_(n-1)): <variance * n>
+     * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X vx_(A,B)
+     * = vx_A + vx_B + (mx_A - mx_B)*(mx_A - mx_B)*n_A*n_B/(n_A+n_B) vy_(A,B) =
+     * vy_A + vy_B + (my_A - my_B)*(my_A - my_B)*n_A*n_B/(n_A+n_B)
+     */
+    public static class GenericUDAFCorrelationEvaluator extends GenericUDAFEvaluator {
+
+        // For PARTIAL1 and COMPLETE
+        private PrimitiveObjectInspector xInputOI;
+        private PrimitiveObjectInspector yInputOI;
+
+        // For PARTIAL2 and FINAL
+        private StructObjectInspector soi;
+        private StructField countField;
+        private StructField xavgField;
+        private StructField yavgField;
+        private StructField xvarField;
+        private StructField yvarField;
+        private StructField covarField;
+        private LongObjectInspector countFieldOI;
+        private DoubleObjectInspector xavgFieldOI;
+        private DoubleObjectInspector yavgFieldOI;
+        private DoubleObjectInspector xvarFieldOI;
+        private DoubleObjectInspector yvarFieldOI;
+        private DoubleObjectInspector covarFieldOI;
+
+        // For PARTIAL1 and PARTIAL2
+        private Object[] partialResult;
+
+        // For FINAL and COMPLETE
+        private DoubleWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            super.init(m, parameters);
+
+            // init input
+            if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+                assert (parameters.length == 2);
+                xInputOI = (PrimitiveObjectInspector) parameters[0];
+                yInputOI = (PrimitiveObjectInspector) parameters[1];
+            } else {
+                assert (parameters.length == 1);
+                soi = (StructObjectInspector) parameters[0];
+
+                countField = soi.getStructFieldRef("count");
+                xavgField = soi.getStructFieldRef("xavg");
+                yavgField = soi.getStructFieldRef("yavg");
+                xvarField = soi.getStructFieldRef("xvar");
+                yvarField = soi.getStructFieldRef("yvar");
+                covarField = soi.getStructFieldRef("covar");
+
+                countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+                xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+                yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+                xvarFieldOI = (DoubleObjectInspector) xvarField.getFieldObjectInspector();
+                yvarFieldOI = (DoubleObjectInspector) yvarField.getFieldObjectInspector();
+                covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+            }
+
+            // init output
+            if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+                // The output of a partial aggregation is a struct containing
+                // a long count, two double averages, two double variances,
+                // and a double covariance.
+
+                ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+                foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+                ArrayList<String> fname = new ArrayList<String>();
+                fname.add("count");
+                fname.add("xavg");
+                fname.add("yavg");
+                fname.add("xvar");
+                fname.add("yvar");
+                fname.add("covar");
+
+                partialResult = new Object[6];
+                partialResult[0] = new LongWritable(0);
+                partialResult[1] = new DoubleWritable(0);
+                partialResult[2] = new DoubleWritable(0);
+                partialResult[3] = new DoubleWritable(0);
+                partialResult[4] = new DoubleWritable(0);
+                partialResult[5] = new DoubleWritable(0);
+
+                return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+            } else {
+                setResult(new DoubleWritable(0));
+                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+            }
+        }
+
+        static class StdAgg implements SerializableBuffer {
+            long count; // number n of elements
+            double xavg; // average of x elements
+            double yavg; // average of y elements
+            double xvar; // n times the variance of x elements
+            double yvar; // n times the variance of y elements
+            double covar; // n times the covariance
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                count = BufferSerDeUtil.getLong(data, start);
+                start += 8;
+                xavg = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                yavg = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                xvar = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                yvar = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                covar = BufferSerDeUtil.getDouble(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeLong(count, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(xavg, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(yavg, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(xvar, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(yvar, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(covar, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeLong(count);
+                output.writeDouble(xavg);
+                output.writeDouble(yavg);
+                output.writeDouble(xvar);
+                output.writeDouble(yvar);
+                output.writeDouble(covar);
+            }
+        };
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            StdAgg result = new StdAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            myagg.count = 0;
+            myagg.xavg = 0;
+            myagg.yavg = 0;
+            myagg.xvar = 0;
+            myagg.yvar = 0;
+            myagg.covar = 0;
+        }
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 2);
+            Object px = parameters[0];
+            Object py = parameters[1];
+            if (px != null && py != null) {
+                StdAgg myagg = (StdAgg) agg;
+                double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+                double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+                double xavgOld = myagg.xavg;
+                double yavgOld = myagg.yavg;
+                myagg.count++;
+                myagg.xavg += (vx - xavgOld) / myagg.count;
+                myagg.yavg += (vy - yavgOld) / myagg.count;
+                if (myagg.count > 1) {
+                    myagg.covar += (vx - xavgOld) * (vy - myagg.yavg);
+                    myagg.xvar += (vx - xavgOld) * (vx - myagg.xavg);
+                    myagg.yvar += (vy - yavgOld) * (vy - myagg.yavg);
+                }
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            ((LongWritable) partialResult[0]).set(myagg.count);
+            ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+            ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+            ((DoubleWritable) partialResult[3]).set(myagg.xvar);
+            ((DoubleWritable) partialResult[4]).set(myagg.yvar);
+            ((DoubleWritable) partialResult[5]).set(myagg.covar);
+            return partialResult;
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                StdAgg myagg = (StdAgg) agg;
+
+                Object partialCount = soi.getStructFieldData(partial, countField);
+                Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+                Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+                Object partialXVar = soi.getStructFieldData(partial, xvarField);
+                Object partialYVar = soi.getStructFieldData(partial, yvarField);
+                Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+                long nA = myagg.count;
+                long nB = countFieldOI.get(partialCount);
+
+                if (nA == 0) {
+                    // Just copy the information since there is nothing so far
+                    myagg.count = countFieldOI.get(partialCount);
+                    myagg.xavg = xavgFieldOI.get(partialXAvg);
+                    myagg.yavg = yavgFieldOI.get(partialYAvg);
+                    myagg.xvar = xvarFieldOI.get(partialXVar);
+                    myagg.yvar = yvarFieldOI.get(partialYVar);
+                    myagg.covar = covarFieldOI.get(partialCovar);
+                }
+
+                if (nA != 0 && nB != 0) {
+                    // Merge the two partials
+                    double xavgA = myagg.xavg;
+                    double yavgA = myagg.yavg;
+                    double xavgB = xavgFieldOI.get(partialXAvg);
+                    double yavgB = yavgFieldOI.get(partialYAvg);
+                    double xvarB = xvarFieldOI.get(partialXVar);
+                    double yvarB = yvarFieldOI.get(partialYVar);
+                    double covarB = covarFieldOI.get(partialCovar);
+
+                    myagg.count += nB;
+                    myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+                    myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+                    myagg.xvar += xvarB + (xavgA - xavgB) * (xavgA - xavgB) * myagg.count;
+                    myagg.yvar += yvarB + (yavgA - yavgB) * (yavgA - yavgB) * myagg.count;
+                    myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+                }
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+
+            if (myagg.count < 2) { // SQL standard - return null for zero or one
+                                   // pair
+                return null;
+            } else {
+                getResult().set(myagg.covar / java.lang.Math.sqrt(myagg.xvar) / java.lang.Math.sqrt(myagg.yvar));
+                return getResult();
+            }
+        }
+
+        public void setResult(DoubleWritable result) {
+            this.result = result;
+        }
+
+        public DoubleWritable getResult() {
+            return result;
+        }
+    }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
new file mode 100644
index 0000000..dc5eef0
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java
@@ -0,0 +1,170 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * This class implements the COUNT aggregation function as in SQL.
+ */
+@Description(name = "count", value = "_FUNC_(*) - Returns the total number of retrieved rows, including "
+        + "rows containing NULL values.\n"
+
+        + "_FUNC_(expr) - Returns the number of rows for which the supplied " + "expression is non-NULL.\n"
+
+        + "_FUNC_(DISTINCT expr[, expr...]) - Returns the number of rows for "
+        + "which the supplied expression(s) are unique and non-NULL.")
+public class GenericUDAFCount implements GenericUDAFResolver2 {
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        // This method implementation is preserved for backward compatibility.
+        return new GenericUDAFCountEvaluator();
+    }
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) throws SemanticException {
+
+        TypeInfo[] parameters = paramInfo.getParameters();
+
+        if (parameters.length == 0) {
+            if (!paramInfo.isAllColumns()) {
+                throw new UDFArgumentException("Argument expected");
+            }
+            assert !paramInfo.isDistinct() : "DISTINCT not supported with *";
+        } else {
+            if (parameters.length > 1 && !paramInfo.isDistinct()) {
+                throw new UDFArgumentException("DISTINCT keyword must be specified");
+            }
+            assert !paramInfo.isAllColumns() : "* not supported in expression list";
+        }
+
+        return new GenericUDAFCountEvaluator().setCountAllColumns(paramInfo.isAllColumns());
+    }
+
+    /**
+     * GenericUDAFCountEvaluator.
+     */
+    public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator {
+        private boolean countAllColumns = false;
+        private LongObjectInspector partialCountAggOI;
+        private LongWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            super.init(m, parameters);
+            partialCountAggOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+            result = new LongWritable(0);
+            return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+        }
+
+        private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) {
+            countAllColumns = countAllCols;
+            return this;
+        }
+
+        /** class for storing count value. */
+        static class CountAgg implements SerializableBuffer {
+            long value;
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                value = BufferSerDeUtil.getLong(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeLong(value, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeLong(value);
+            }
+        }
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            CountAgg buffer = new CountAgg();
+            reset(buffer);
+            return buffer;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            ((CountAgg) agg).value = 0;
+        }
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            // parameters == null means the input table/split is empty
+            if (parameters == null) {
+                return;
+            }
+            if (countAllColumns) {
+                assert parameters.length == 0;
+                ((CountAgg) agg).value++;
+            } else {
+                assert parameters.length > 0;
+                boolean countThisRow = true;
+                for (Object nextParam : parameters) {
+                    if (nextParam == null) {
+                        countThisRow = false;
+                        break;
+                    }
+                }
+                if (countThisRow) {
+                    ((CountAgg) agg).value++;
+                }
+            }
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                long p = partialCountAggOI.get(partial);
+                ((CountAgg) agg).value += p;
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            result.set(((CountAgg) agg).value);
+            return result;
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            return terminate(agg);
+        }
+    }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
new file mode 100644
index 0000000..0c4448b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java
@@ -0,0 +1,341 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the covariance covar_pop(x, y), using the following one-pass method
+ * (ref. "Formulas for Robust, One-Pass Parallel Computation of Covariances and
+ * Arbitrary-Order Statistical Moments", Philippe Pebay, Sandia Labs):
+ * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg> my_n =
+ * my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n - mx_(n-1))*(y_n
+ * - my_n) : <covariance * n>
+ * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+ */
+@Description(name = "covariance,covar_pop", value = "_FUNC_(x,y) - Returns the population covariance of a set of number pairs", extended = "The function takes as arguments any pair of numeric types and returns a double.\n"
+        + "Any pair with a NULL is ignored. If the function is applied to an empty set, NULL\n"
+        + "will be returned. Otherwise, it computes the following:\n"
+        + "   (SUM(x*y)-SUM(x)*SUM(y)/COUNT(x,y))/COUNT(x,y)\n" + "where neither x nor y is null.")
+public class GenericUDAFCovariance extends AbstractGenericUDAFResolver {
+
+    static final Log LOG = LogFactory.getLog(GenericUDAFCovariance.class.getName());
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        if (parameters.length != 2) {
+            throw new UDFArgumentTypeException(parameters.length - 1, "Exactly two arguments are expected.");
+        }
+
+        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+                    + parameters[0].getTypeName() + " is passed.");
+        }
+
+        if (parameters[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(1, "Only primitive type arguments are accepted but "
+                    + parameters[1].getTypeName() + " is passed.");
+        }
+
+        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+            case BYTE:
+            case SHORT:
+            case INT:
+            case LONG:
+            case FLOAT:
+            case DOUBLE:
+                switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) {
+                    case BYTE:
+                    case SHORT:
+                    case INT:
+                    case LONG:
+                    case FLOAT:
+                    case DOUBLE:
+                        return new GenericUDAFCovarianceEvaluator();
+                    case STRING:
+                    case BOOLEAN:
+                    default:
+                        throw new UDFArgumentTypeException(1, "Only numeric or string type arguments are accepted but "
+                                + parameters[1].getTypeName() + " is passed.");
+                }
+            case STRING:
+            case BOOLEAN:
+            default:
+                throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+                        + parameters[0].getTypeName() + " is passed.");
+        }
+    }
+
+    /**
+     * Evaluate the variance using the algorithm described in
+     * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance,
+     * presumably by Pébay, Philippe (2008), in "Formulas for Robust, One-Pass
+     * Parallel Computation of Covariances and Arbitrary-Order Statistical
+     * Moments", Technical Report SAND2008-6212, Sandia National Laboratories,
+     * http://infoserve.sandia.gov/sand_doc/2008/086212.pdf
+     * Incremental: n : <count> mx_n = mx_(n-1) + [x_n - mx_(n-1)]/n : <xavg>
+     * my_n = my_(n-1) + [y_n - my_(n-1)]/n : <yavg> c_n = c_(n-1) + (x_n -
+     * mx_(n-1))*(y_n - my_n) : <covariance * n>
+     * Merge: c_X = c_A + c_B + (mx_A - mx_B)*(my_A - my_B)*n_A*n_B/n_X
+     * This one-pass algorithm is stable.
+     */
+    public static class GenericUDAFCovarianceEvaluator extends GenericUDAFEvaluator {
+
+        // For PARTIAL1 and COMPLETE
+        private PrimitiveObjectInspector xInputOI;
+        private PrimitiveObjectInspector yInputOI;
+
+        // For PARTIAL2 and FINAL
+        private StructObjectInspector soi;
+        private StructField countField;
+        private StructField xavgField;
+        private StructField yavgField;
+        private StructField covarField;
+        private LongObjectInspector countFieldOI;
+        private DoubleObjectInspector xavgFieldOI;
+        private DoubleObjectInspector yavgFieldOI;
+        private DoubleObjectInspector covarFieldOI;
+
+        // For PARTIAL1 and PARTIAL2
+        private Object[] partialResult;
+
+        // For FINAL and COMPLETE
+        private DoubleWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            super.init(m, parameters);
+
+            // init input
+            if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+                assert (parameters.length == 2);
+                xInputOI = (PrimitiveObjectInspector) parameters[0];
+                yInputOI = (PrimitiveObjectInspector) parameters[1];
+            } else {
+                assert (parameters.length == 1);
+                soi = (StructObjectInspector) parameters[0];
+
+                countField = soi.getStructFieldRef("count");
+                xavgField = soi.getStructFieldRef("xavg");
+                yavgField = soi.getStructFieldRef("yavg");
+                covarField = soi.getStructFieldRef("covar");
+
+                countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+                xavgFieldOI = (DoubleObjectInspector) xavgField.getFieldObjectInspector();
+                yavgFieldOI = (DoubleObjectInspector) yavgField.getFieldObjectInspector();
+                covarFieldOI = (DoubleObjectInspector) covarField.getFieldObjectInspector();
+            }
+
+            // init output
+            if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+                // The output of a partial aggregation is a struct containing
+                // a long count, two double averages, and a double covariance.
+
+                ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+                foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+                ArrayList<String> fname = new ArrayList<String>();
+                fname.add("count");
+                fname.add("xavg");
+                fname.add("yavg");
+                fname.add("covar");
+
+                partialResult = new Object[4];
+                partialResult[0] = new LongWritable(0);
+                partialResult[1] = new DoubleWritable(0);
+                partialResult[2] = new DoubleWritable(0);
+                partialResult[3] = new DoubleWritable(0);
+
+                return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+            } else {
+                setResult(new DoubleWritable(0));
+                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+            }
+        }
+
+        static class StdAgg implements SerializableBuffer {
+            long count; // number n of elements
+            double xavg; // average of x elements
+            double yavg; // average of y elements
+            double covar; // n times the covariance
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                count = BufferSerDeUtil.getLong(data, start);
+                start += 8;
+                xavg = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                yavg = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                covar = BufferSerDeUtil.getDouble(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeLong(count, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(xavg, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(yavg, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(covar, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeLong(count);
+                output.writeDouble(xavg);
+                output.writeDouble(yavg);
+                output.writeDouble(covar);
+            }
+        };
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            StdAgg result = new StdAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            myagg.count = 0;
+            myagg.xavg = 0;
+            myagg.yavg = 0;
+            myagg.covar = 0;
+        }
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 2);
+            Object px = parameters[0];
+            Object py = parameters[1];
+            if (px != null && py != null) {
+                StdAgg myagg = (StdAgg) agg;
+                double vx = PrimitiveObjectInspectorUtils.getDouble(px, xInputOI);
+                double vy = PrimitiveObjectInspectorUtils.getDouble(py, yInputOI);
+                myagg.count++;
+                myagg.yavg = myagg.yavg + (vy - myagg.yavg) / myagg.count;
+                if (myagg.count > 1) {
+                    myagg.covar += (vx - myagg.xavg) * (vy - myagg.yavg);
+                }
+                myagg.xavg = myagg.xavg + (vx - myagg.xavg) / myagg.count;
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            ((LongWritable) partialResult[0]).set(myagg.count);
+            ((DoubleWritable) partialResult[1]).set(myagg.xavg);
+            ((DoubleWritable) partialResult[2]).set(myagg.yavg);
+            ((DoubleWritable) partialResult[3]).set(myagg.covar);
+            return partialResult;
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                StdAgg myagg = (StdAgg) agg;
+
+                Object partialCount = soi.getStructFieldData(partial, countField);
+                Object partialXAvg = soi.getStructFieldData(partial, xavgField);
+                Object partialYAvg = soi.getStructFieldData(partial, yavgField);
+                Object partialCovar = soi.getStructFieldData(partial, covarField);
+
+                long nA = myagg.count;
+                long nB = countFieldOI.get(partialCount);
+
+                if (nA == 0) {
+                    // Just copy the information since there is nothing so far
+                    myagg.count = countFieldOI.get(partialCount);
+                    myagg.xavg = xavgFieldOI.get(partialXAvg);
+                    myagg.yavg = yavgFieldOI.get(partialYAvg);
+                    myagg.covar = covarFieldOI.get(partialCovar);
+                }
+
+                if (nA != 0 && nB != 0) {
+                    // Merge the two partials
+                    double xavgA = myagg.xavg;
+                    double yavgA = myagg.yavg;
+                    double xavgB = xavgFieldOI.get(partialXAvg);
+                    double yavgB = yavgFieldOI.get(partialYAvg);
+                    double covarB = covarFieldOI.get(partialCovar);
+
+                    myagg.count += nB;
+                    myagg.xavg = (xavgA * nA + xavgB * nB) / myagg.count;
+                    myagg.yavg = (yavgA * nA + yavgB * nB) / myagg.count;
+                    myagg.covar += covarB + (xavgA - xavgB) * (yavgA - yavgB) * ((double) (nA * nB) / myagg.count);
+                }
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+
+            if (myagg.count == 0) { // SQL standard - return null for zero
+                                    // elements
+                return null;
+            } else {
+                getResult().set(myagg.covar / (myagg.count));
+                return getResult();
+            }
+        }
+
+        public void setResult(DoubleWritable result) {
+            this.result = result;
+        }
+
+        public DoubleWritable getResult() {
+            return result;
+        }
+    }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
new file mode 100644
index 0000000..afdc397
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java
@@ -0,0 +1,272 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * GenericUDAFSum.
+ */
+@Description(name = "sum", value = "_FUNC_(x) - Returns the sum of a set of numbers")
+public class GenericUDAFSum extends AbstractGenericUDAFResolver {
+
+    static final Log LOG = LogFactory.getLog(GenericUDAFSum.class.getName());
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        if (parameters.length != 1) {
+            throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+        }
+
+        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+                    + parameters[0].getTypeName() + " is passed.");
+        }
+        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+            case BYTE:
+            case SHORT:
+            case INT:
+            case LONG:
+                return new GenericUDAFSumLong();
+            case FLOAT:
+            case DOUBLE:
+            case STRING:
+                return new GenericUDAFSumDouble();
+            case BOOLEAN:
+            default:
+                throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+                        + parameters[0].getTypeName() + " is passed.");
+        }
+    }
+
+    /**
+     * GenericUDAFSumDouble.
+     */
+    public static class GenericUDAFSumDouble extends GenericUDAFEvaluator {
+        private PrimitiveObjectInspector inputOI;
+        private DoubleWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            super.init(m, parameters);
+            result = new DoubleWritable(0);
+            inputOI = (PrimitiveObjectInspector) parameters[0];
+            return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+        }
+
+        /** class for storing double sum value. */
+        static class SumDoubleAgg implements SerializableBuffer {
+            boolean empty;
+            double sum;
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                empty = BufferSerDeUtil.getBoolean(data, start);
+                start += 1;
+                sum = BufferSerDeUtil.getDouble(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeBoolean(empty, data, start);
+                start += 1;
+                BufferSerDeUtil.writeDouble(sum, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeBoolean(empty);
+                output.writeDouble(sum);
+            }
+        }
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            SumDoubleAgg result = new SumDoubleAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            SumDoubleAgg myagg = (SumDoubleAgg) agg;
+            myagg.empty = true;
+            myagg.sum = 0;
+        }
+
+        boolean warned = false;
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            try {
+                merge(agg, parameters[0]);
+            } catch (NumberFormatException e) {
+                if (!warned) {
+                    warned = true;
+                    LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+                    LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+                }
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            return terminate(agg);
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                SumDoubleAgg myagg = (SumDoubleAgg) agg;
+                myagg.empty = false;
+                myagg.sum += PrimitiveObjectInspectorUtils.getDouble(partial, inputOI);
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            SumDoubleAgg myagg = (SumDoubleAgg) agg;
+            if (myagg.empty) {
+                return null;
+            }
+            result.set(myagg.sum);
+            return result;
+        }
+
+    }
+
+    /**
+     * GenericUDAFSumLong.
+     */
+    public static class GenericUDAFSumLong extends GenericUDAFEvaluator {
+        private PrimitiveObjectInspector inputOI;
+        private LongWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            super.init(m, parameters);
+            result = new LongWritable(0);
+            inputOI = (PrimitiveObjectInspector) parameters[0];
+            return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
+        }
+
+        /** class for storing double sum value. */
+        static class SumLongAgg implements SerializableBuffer {
+            boolean empty;
+            long sum;
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                empty = BufferSerDeUtil.getBoolean(data, start);
+                start += 1;
+                sum = BufferSerDeUtil.getLong(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeBoolean(empty, data, start);
+                start += 1;
+                BufferSerDeUtil.writeLong(sum, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeBoolean(empty);
+                output.writeLong(sum);
+            }
+        }
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            SumLongAgg result = new SumLongAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            SumLongAgg myagg = (SumLongAgg) agg;
+            myagg.empty = true;
+            myagg.sum = 0;
+        }
+
+        private boolean warned = false;
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            try {
+                merge(agg, parameters[0]);
+            } catch (NumberFormatException e) {
+                if (!warned) {
+                    warned = true;
+                    LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+                }
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            return terminate(agg);
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                SumLongAgg myagg = (SumLongAgg) agg;
+                myagg.sum += PrimitiveObjectInspectorUtils.getLong(partial, inputOI);
+                myagg.empty = false;
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            SumLongAgg myagg = (SumLongAgg) agg;
+            if (myagg.empty) {
+                return null;
+            }
+            result.set(myagg.sum);
+            return result;
+        }
+
+    }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
new file mode 100644
index 0000000..e839008
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java
@@ -0,0 +1,305 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.util.StringUtils;
+
+import edu.uci.ics.hivesterix.runtime.evaluator.BufferSerDeUtil;
+import edu.uci.ics.hivesterix.runtime.evaluator.SerializableBuffer;
+
+/**
+ * Compute the variance. This class is extended by: GenericUDAFVarianceSample
+ * GenericUDAFStd GenericUDAFStdSample
+ */
+@Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers")
+public class GenericUDAFVariance extends AbstractGenericUDAFResolver {
+
+    static final Log LOG = LogFactory.getLog(GenericUDAFVariance.class.getName());
+
+    @Override
+    public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
+        if (parameters.length != 1) {
+            throw new UDFArgumentTypeException(parameters.length - 1, "Exactly one argument is expected.");
+        }
+
+        if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+            throw new UDFArgumentTypeException(0, "Only primitive type arguments are accepted but "
+                    + parameters[0].getTypeName() + " is passed.");
+        }
+        switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
+            case BYTE:
+            case SHORT:
+            case INT:
+            case LONG:
+            case FLOAT:
+            case DOUBLE:
+            case STRING:
+                return new GenericUDAFVarianceEvaluator();
+            case BOOLEAN:
+            default:
+                throw new UDFArgumentTypeException(0, "Only numeric or string type arguments are accepted but "
+                        + parameters[0].getTypeName() + " is passed.");
+        }
+    }
+
+    /**
+     * Evaluate the variance using the algorithm described by Chan, Golub, and
+     * LeVeque in
+     * "Algorithms for computing the sample variance: analysis and recommendations"
+     * The American Statistician, 37 (1983) pp. 242--247.
+     * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2)
+     * where: - variance is sum[x-avg^2] (this is actually n times the variance)
+     * and is updated at every step. - n is the count of elements in chunk1 - m
+     * is the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 =
+     * sum of elements in chunk2.
+     * This algorithm was proven to be numerically stable by J.L. Barlow in
+     * "Error analysis of a pairwise summation algorithm to compute sample variance"
+     * Numer. Math, 58 (1991) pp. 583--590
+     */
+    public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator {
+
+        // For PARTIAL1 and COMPLETE
+        private PrimitiveObjectInspector inputOI;
+
+        // For PARTIAL2 and FINAL
+        private StructObjectInspector soi;
+        private StructField countField;
+        private StructField sumField;
+        private StructField varianceField;
+        private LongObjectInspector countFieldOI;
+        private DoubleObjectInspector sumFieldOI;
+
+        // For PARTIAL1 and PARTIAL2
+        private Object[] partialResult;
+
+        // For FINAL and COMPLETE
+        private DoubleWritable result;
+
+        @Override
+        public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            super.init(m, parameters);
+
+            // init input
+            if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
+                inputOI = (PrimitiveObjectInspector) parameters[0];
+            } else {
+                soi = (StructObjectInspector) parameters[0];
+
+                countField = soi.getStructFieldRef("count");
+                sumField = soi.getStructFieldRef("sum");
+                varianceField = soi.getStructFieldRef("variance");
+
+                countFieldOI = (LongObjectInspector) countField.getFieldObjectInspector();
+                sumFieldOI = (DoubleObjectInspector) sumField.getFieldObjectInspector();
+            }
+
+            // init output
+            if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
+                // The output of a partial aggregation is a struct containing
+                // a long count and doubles sum and variance.
+
+                ArrayList<ObjectInspector> foi = new ArrayList<ObjectInspector>();
+
+                foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+                foi.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
+
+                ArrayList<String> fname = new ArrayList<String>();
+                fname.add("count");
+                fname.add("sum");
+                fname.add("variance");
+
+                partialResult = new Object[3];
+                partialResult[0] = new LongWritable(0);
+                partialResult[1] = new DoubleWritable(0);
+                partialResult[2] = new DoubleWritable(0);
+
+                return ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
+
+            } else {
+                setResult(new DoubleWritable(0));
+                return PrimitiveObjectInspectorFactory.writableDoubleObjectInspector;
+            }
+        }
+
+        static class StdAgg implements SerializableBuffer {
+            long count; // number of elements
+            double sum; // sum of elements
+            double variance; // sum[x-avg^2] (this is actually n times the
+                             // variance)
+
+            @Override
+            public void deSerializeAggBuffer(byte[] data, int start, int len) {
+                count = BufferSerDeUtil.getLong(data, start);
+                start += 8;
+                sum = BufferSerDeUtil.getDouble(data, start);
+                start += 8;
+                variance = BufferSerDeUtil.getDouble(data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(byte[] data, int start, int len) {
+                BufferSerDeUtil.writeLong(count, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(sum, data, start);
+                start += 8;
+                BufferSerDeUtil.writeDouble(variance, data, start);
+            }
+
+            @Override
+            public void serializeAggBuffer(DataOutput output) throws IOException {
+                output.writeLong(count);
+                output.writeDouble(sum);
+                output.writeDouble(variance);
+            }
+        };
+
+        @Override
+        public AggregationBuffer getNewAggregationBuffer() throws HiveException {
+            StdAgg result = new StdAgg();
+            reset(result);
+            return result;
+        }
+
+        @Override
+        public void reset(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            myagg.count = 0;
+            myagg.sum = 0;
+            myagg.variance = 0;
+        }
+
+        private boolean warned = false;
+
+        @Override
+        public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
+            assert (parameters.length == 1);
+            Object p = parameters[0];
+            if (p != null) {
+                StdAgg myagg = (StdAgg) agg;
+                try {
+                    double v = PrimitiveObjectInspectorUtils.getDouble(p, inputOI);
+                    myagg.count++;
+                    myagg.sum += v;
+                    if (myagg.count > 1) {
+                        double t = myagg.count * v - myagg.sum;
+                        myagg.variance += (t * t) / ((double) myagg.count * (myagg.count - 1));
+                    }
+                } catch (NumberFormatException e) {
+                    if (!warned) {
+                        warned = true;
+                        LOG.warn(getClass().getSimpleName() + " " + StringUtils.stringifyException(e));
+                        LOG.warn(getClass().getSimpleName() + " ignoring similar exceptions.");
+                    }
+                }
+            }
+        }
+
+        @Override
+        public Object terminatePartial(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+            ((LongWritable) partialResult[0]).set(myagg.count);
+            ((DoubleWritable) partialResult[1]).set(myagg.sum);
+            ((DoubleWritable) partialResult[2]).set(myagg.variance);
+            return partialResult;
+        }
+
+        @Override
+        public void merge(AggregationBuffer agg, Object partial) throws HiveException {
+            if (partial != null) {
+                StdAgg myagg = (StdAgg) agg;
+
+                Object partialCount = soi.getStructFieldData(partial, countField);
+                Object partialSum = soi.getStructFieldData(partial, sumField);
+                Object partialVariance = soi.getStructFieldData(partial, varianceField);
+
+                long n = myagg.count;
+                long m = countFieldOI.get(partialCount);
+
+                if (n == 0) {
+                    // Just copy the information since there is nothing so far
+                    myagg.variance = sumFieldOI.get(partialVariance);
+                    myagg.count = countFieldOI.get(partialCount);
+                    myagg.sum = sumFieldOI.get(partialSum);
+                }
+
+                if (m != 0 && n != 0) {
+                    // Merge the two partials
+
+                    double a = myagg.sum;
+                    double b = sumFieldOI.get(partialSum);
+
+                    myagg.count += m;
+                    myagg.sum += b;
+                    double t = (m / (double) n) * a - b;
+                    myagg.variance += sumFieldOI.get(partialVariance) + ((n / (double) m) / ((double) n + m)) * t * t;
+                }
+            }
+        }
+
+        @Override
+        public Object terminate(AggregationBuffer agg) throws HiveException {
+            StdAgg myagg = (StdAgg) agg;
+
+            if (myagg.count == 0) { // SQL standard - return null for zero
+                                    // elements
+                return null;
+            } else {
+                if (myagg.count > 1) {
+                    getResult().set(myagg.variance / (myagg.count));
+                } else { // for one element the variance is always 0
+                    getResult().set(0);
+                }
+                return getResult();
+            }
+        }
+
+        public void setResult(DoubleWritable result) {
+            this.result = result;
+        }
+
+        public DoubleWritable getResult() {
+            return result;
+        }
+    }
+
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
new file mode 100644
index 0000000..95b999e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.typeinfo;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+
+/**
+ * TypeInfoFactory can be used to create the TypeInfo object for any types.
+ * TypeInfo objects are all read-only so we can reuse them easily.
+ * TypeInfoFactory has internal cache to make sure we don't create 2 TypeInfo
+ * objects that represents the same type.
+ */
+public final class TypeInfoFactory {
+
+    static ConcurrentHashMap<String, TypeInfo> cachedPrimitiveTypeInfo = new ConcurrentHashMap<String, TypeInfo>();
+
+    private TypeInfoFactory() {
+        // prevent instantiation
+    }
+
+    public static TypeInfo getPrimitiveTypeInfo(String typeName) {
+        if (null == PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(typeName)) {
+            throw new RuntimeException("Cannot getPrimitiveTypeInfo for " + typeName);
+        }
+        TypeInfo result = cachedPrimitiveTypeInfo.get(typeName);
+        if (result == null) {
+            result = new PrimitiveTypeInfo(typeName);
+            cachedPrimitiveTypeInfo.put(typeName, result);
+        }
+        return result;
+    }
+
+    public static final TypeInfo voidTypeInfo = getPrimitiveTypeInfo(Constants.VOID_TYPE_NAME);
+    public static final TypeInfo booleanTypeInfo = getPrimitiveTypeInfo(Constants.BOOLEAN_TYPE_NAME);
+    public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(Constants.INT_TYPE_NAME);
+    public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(Constants.BIGINT_TYPE_NAME);
+    public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(Constants.STRING_TYPE_NAME);
+    public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(Constants.FLOAT_TYPE_NAME);
+    public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(Constants.DOUBLE_TYPE_NAME);
+    public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(Constants.TINYINT_TYPE_NAME);
+    public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(Constants.SMALLINT_TYPE_NAME);
+
+    public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");
+
+    public static TypeInfo getPrimitiveTypeInfoFromPrimitiveWritable(Class<?> clazz) {
+        String typeName = PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveWritable(clazz);
+        if (typeName == null) {
+            throw new RuntimeException("Internal error: Cannot get typeName for " + clazz);
+        }
+        return getPrimitiveTypeInfo(typeName);
+    }
+
+    public static TypeInfo getPrimitiveTypeInfoFromJavaPrimitive(Class<?> clazz) {
+        return getPrimitiveTypeInfo(PrimitiveObjectInspectorUtils.getTypeNameFromPrimitiveJava(clazz));
+    }
+
+    static ConcurrentHashMap<ArrayList<List<?>>, TypeInfo> cachedStructTypeInfo = new ConcurrentHashMap<ArrayList<List<?>>, TypeInfo>();
+
+    public static TypeInfo getStructTypeInfo(List<String> names, List<TypeInfo> typeInfos) {
+        ArrayList<List<?>> signature = new ArrayList<List<?>>(2);
+        signature.add(names);
+        signature.add(typeInfos);
+        TypeInfo result = cachedStructTypeInfo.get(signature);
+        if (result == null) {
+            result = new StructTypeInfo(names, typeInfos);
+            cachedStructTypeInfo.put(signature, result);
+        }
+        return result;
+    }
+
+    static ConcurrentHashMap<List<?>, TypeInfo> cachedUnionTypeInfo = new ConcurrentHashMap<List<?>, TypeInfo>();
+
+    public static TypeInfo getUnionTypeInfo(List<TypeInfo> typeInfos) {
+        TypeInfo result = cachedUnionTypeInfo.get(typeInfos);
+        if (result == null) {
+            result = new UnionTypeInfo(typeInfos);
+            cachedUnionTypeInfo.put(typeInfos, result);
+        }
+        return result;
+    }
+
+    static ConcurrentHashMap<TypeInfo, TypeInfo> cachedListTypeInfo = new ConcurrentHashMap<TypeInfo, TypeInfo>();
+
+    public static TypeInfo getListTypeInfo(TypeInfo elementTypeInfo) {
+        TypeInfo result = cachedListTypeInfo.get(elementTypeInfo);
+        if (result == null) {
+            result = new ListTypeInfo(elementTypeInfo);
+            cachedListTypeInfo.put(elementTypeInfo, result);
+        }
+        return result;
+    }
+
+    static ConcurrentHashMap<ArrayList<TypeInfo>, TypeInfo> cachedMapTypeInfo = new ConcurrentHashMap<ArrayList<TypeInfo>, TypeInfo>();
+
+    public static TypeInfo getMapTypeInfo(TypeInfo keyTypeInfo, TypeInfo valueTypeInfo) {
+        ArrayList<TypeInfo> signature = new ArrayList<TypeInfo>(2);
+        signature.add(keyTypeInfo);
+        signature.add(valueTypeInfo);
+        TypeInfo result = cachedMapTypeInfo.get(signature);
+        if (result == null) {
+            result = new MapTypeInfo(keyTypeInfo, valueTypeInfo);
+            cachedMapTypeInfo.put(signature, result);
+        }
+        return result;
+    };
+
+}
\ No newline at end of file
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
new file mode 100644
index 0000000..2d2401a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../../hyracks
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl b/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
new file mode 100644
index 0000000..377cdbe
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/configuration.xsl
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
+<xsl:output method="html"/>
+<xsl:template match="configuration">
+<html>
+<body>
+<table border="1">
+<tr>
+ <td>name</td>
+ <td>value</td>
+ <td>description</td>
+</tr>
+<xsl:for-each select="property">
+<tr>
+  <td><a name="{name}"><xsl:value-of select="name"/></a></td>
+  <td><xsl:value-of select="value"/></td>
+  <td><xsl:value-of select="description"/></td>
+</tr>
+</xsl:for-each>
+</table>
+</body>
+</html>
+</xsl:template>
+</xsl:stylesheet>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
new file mode 100644
index 0000000..587eede
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-default.xml
@@ -0,0 +1,758 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+
+<configuration>
+
+	<!-- Hive Configuration can either be stored in this file or in the hadoop 
+		configuration files -->
+	<!-- that are implied by Hadoop setup variables. -->
+	<!-- Aside from Hadoop setup variables - this file is provided as a convenience 
+		so that Hive -->
+	<!-- users do not have to edit hadoop configuration files (that may be managed 
+		as a centralized -->
+	<!-- resource). -->
+
+	<!-- Hive Execution Parameters -->
+	<property>
+		<name>mapred.reduce.tasks</name>
+		<value>-1</value>
+		<description>The default number of reduce tasks per job. Typically set
+			to a prime close to the number of available hosts. Ignored when
+			mapred.job.tracker is "local". Hadoop set this to 1 by default,
+			whereas hive uses -1 as its default value.
+			By setting this property to -1, Hive will automatically figure out what
+			should be the number of reducers.
+  </description>
+
+        <property>
+		<name>hive.hyracks.connectorpolicy</name>
+		<value>PIPELINING</value>
+        </property>
+
+	<property>
+		<name>hive.hyracks.parrallelism</name>
+		<value>4</value>
+	</property>
+
+	<property>
+		<name>hive.algebricks.groupby.external</name>
+		<value>true</value>
+	</property>
+	
+	<property>
+		<name>hive.algebricks.groupby.external.memory</name>
+		<value>33554432</value>
+	</property>
+	
+	<property>
+		<name>hive.algebricks.sort.memory</name>
+		<value>33554432</value>
+	</property>
+
+	<property>
+		<name>hive.exec.reducers.bytes.per.reducer</name>
+		<value>1000000000</value>
+		<description>size per reducer.The default is 1G, i.e if the input size
+			is 10G, it will use 10 reducers.</description>
+	</property>
+
+	<property>
+		<name>hive.exec.reducers.max</name>
+		<value>999</value>
+		<description>max number of reducers will be used. If the one
+			specified in the configuration parameter mapred.reduce.tasks is
+			negative, hive will use this one as the max number of reducers when
+			automatically determine number of reducers.</description>
+	</property>
+
+	<property>
+		<name>hive.exec.scratchdir</name>
+		<value>/hive-${user.name}</value>
+		<description>Scratch space for Hive jobs</description>
+	</property>
+
+	<property>
+		<name>hive.test.mode</name>
+		<value>false</value>
+		<description>whether hive is running in test mode. If yes, it turns on
+			sampling and prefixes the output tablename</description>
+	</property>
+
+	<property>
+		<name>hive.test.mode.prefix</name>
+		<value>test_</value>
+		<description>if hive is running in test mode, prefixes the output
+			table by this string</description>
+	</property>
+
+	<!-- If the input table is not bucketed, the denominator of the tablesample 
+		is determinied by the parameter below -->
+	<!-- For example, the following query: -->
+	<!-- INSERT OVERWRITE TABLE dest -->
+	<!-- SELECT col1 from src -->
+	<!-- would be converted to -->
+	<!-- INSERT OVERWRITE TABLE test_dest -->
+	<!-- SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1)) -->
+	<property>
+		<name>hive.test.mode.samplefreq</name>
+		<value>32</value>
+		<description>if hive is running in test mode and table is not
+			bucketed, sampling frequency</description>
+	</property>
+
+	<property>
+		<name>hive.test.mode.nosamplelist</name>
+		<value></value>
+		<description>if hive is running in test mode, dont sample the above
+			comma seperated list of tables</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.local</name>
+		<value>true</value>
+		<description>controls whether to connect to remove metastore server or
+			open a new metastore server in Hive Client JVM</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.ConnectionURL</name>
+		<value>jdbc:derby:;databaseName=metastore_db;create=true</value>
+		<description>JDBC connect string for a JDBC metastore</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.ConnectionDriverName</name>
+		<value>org.apache.derby.jdbc.EmbeddedDriver</value>
+		<description>Driver class name for a JDBC metastore</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.PersistenceManagerFactoryClass</name>
+		<value>org.datanucleus.jdo.JDOPersistenceManagerFactory</value>
+		<description>class implementing the jdo persistence</description>
+	</property>
+
+	<property>
+		<name>datanucleus.connectionPoolingType</name>
+		<value>DBCP</value>
+		<description>Uses a DBCP connection pool for JDBC metastore
+		</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.DetachAllOnCommit</name>
+		<value>true</value>
+		<description>detaches all objects from session so that they can be
+			used after transaction is committed</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.NonTransactionalRead</name>
+		<value>true</value>
+		<description>reads outside of transactions</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.ConnectionUserName</name>
+		<value>APP</value>
+		<description>username to use against metastore database</description>
+	</property>
+
+	<property>
+		<name>javax.jdo.option.ConnectionPassword</name>
+		<value>mine</value>
+		<description>password to use against metastore database</description>
+	</property>
+
+	<property>
+		<name>datanucleus.validateTables</name>
+		<value>false</value>
+		<description>validates existing schema against code. turn this on if
+			you want to verify existing schema </description>
+	</property>
+
+	<property>
+		<name>datanucleus.validateColumns</name>
+		<value>false</value>
+		<description>validates existing schema against code. turn this on if
+			you want to verify existing schema </description>
+	</property>
+
+	<property>
+		<name>datanucleus.validateConstraints</name>
+		<value>false</value>
+		<description>validates existing schema against code. turn this on if
+			you want to verify existing schema </description>
+	</property>
+
+	<property>
+		<name>datanucleus.storeManagerType</name>
+		<value>rdbms</value>
+		<description>metadata store type</description>
+	</property>
+
+	<property>
+		<name>datanucleus.autoCreateSchema</name>
+		<value>true</value>
+		<description>creates necessary schema on a startup if one doesn't
+			exist. set this to false, after creating it once</description>
+	</property>
+
+	<property>
+		<name>datanucleus.autoStartMechanismMode</name>
+		<value>checked</value>
+		<description>throw exception if metadata tables are incorrect
+		</description>
+	</property>
+
+	<property>
+		<name>datanucleus.transactionIsolation</name>
+		<value>read-committed</value>
+		<description>Default transaction isolation level for identity
+			generation. </description>
+	</property>
+
+	<property>
+		<name>datanucleus.cache.level2</name>
+		<value>false</value>
+		<description>Use a level 2 cache. Turn this off if metadata is changed
+			independently of hive metastore server</description>
+	</property>
+
+	<property>
+		<name>datanucleus.cache.level2.type</name>
+		<value>SOFT</value>
+		<description>SOFT=soft reference based cache, WEAK=weak reference
+			based cache.</description>
+	</property>
+
+	<property>
+		<name>datanucleus.identifierFactory</name>
+		<value>datanucleus</value>
+		<description>Name of the identifier factory to use when generating
+			table/column names etc. 'datanucleus' is used for backward
+			compatibility</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.warehouse.dir</name>
+		<value>/user/hivesterix</value>
+		<description>location of default database for the warehouse
+		</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.connect.retries</name>
+		<value>5</value>
+		<description>Number of retries while opening a connection to metastore
+		</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.rawstore.impl</name>
+		<value>org.apache.hadoop.hive.metastore.ObjectStore</value>
+		<description>Name of the class that implements
+			org.apache.hadoop.hive.metastore.rawstore interface. This class is
+			used to store and retrieval of raw metadata objects such as table,
+			database</description>
+	</property>
+
+	<property>
+		<name>hive.default.fileformat</name>
+		<value>TextFile</value>
+		<description>Default file format for CREATE TABLE statement. Options
+			are TextFile and SequenceFile. Users can explicitly say CREATE TABLE
+			... STORED AS &lt;TEXTFILE|SEQUENCEFILE&gt; to override</description>
+	</property>
+
+	<property>
+		<name>hive.fileformat.check</name>
+		<value>true</value>
+		<description>Whether to check file format or not when loading data
+			files</description>
+	</property>
+
+	<property>
+		<name>hive.map.aggr</name>
+		<value>true</value>
+		<description>Whether to use map-side aggregation in Hive Group By
+			queries</description>
+	</property>
+
+	<property>
+		<name>hive.groupby.skewindata</name>
+		<value>false</value>
+		<description>Whether there is skew in data to optimize group by
+			queries</description>
+	</property>
+
+	<property>
+		<name>hive.groupby.mapaggr.checkinterval</name>
+		<value>100000</value>
+		<description>Number of rows after which size of the grouping
+			keys/aggregation classes is performed</description>
+	</property>
+
+	<property>
+		<name>hive.mapred.local.mem</name>
+		<value>0</value>
+		<description>For local mode, memory of the mappers/reducers
+		</description>
+	</property>
+
+	<property>
+		<name>hive.map.aggr.hash.percentmemory</name>
+		<value>0.5</value>
+		<description>Portion of total memory to be used by map-side grup
+			aggregation hash table</description>
+	</property>
+
+	<property>
+		<name>hive.map.aggr.hash.min.reduction</name>
+		<value>0.5</value>
+		<description>Hash aggregation will be turned off if the ratio between
+			hash
+			table size and input rows is bigger than this number. Set to 1 to make
+			sure
+			hash aggregation is never turned off.</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.cp</name>
+		<value>true</value>
+		<description>Whether to enable column pruner</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.ppd</name>
+		<value>true</value>
+		<description>Whether to enable predicate pushdown</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.pruner</name>
+		<value>true</value>
+		<description>Whether to enable the new partition pruner which depends
+			on predicate pushdown. If this is disabled,
+			the old partition pruner which is based on AST will be enabled.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.groupby</name>
+		<value>true</value>
+		<description>Whether to enable the bucketed group by from bucketed
+			partitions/tables.</description>
+	</property>
+
+	<property>
+		<name>hive.join.emit.interval</name>
+		<value>1000</value>
+		<description>How many rows in the right-most join operand Hive should
+			buffer before emitting the join result. </description>
+	</property>
+
+	<property>
+		<name>hive.join.cache.size</name>
+		<value>25000</value>
+		<description>How many rows in the joining tables (except the streaming
+			table) should be cached in memory. </description>
+	</property>
+
+	<property>
+		<name>hive.mapjoin.bucket.cache.size</name>
+		<value>100</value>
+		<description>How many values in each keys in the map-joined table
+			should be cached in memory. </description>
+	</property>
+
+	<property>
+		<name>hive.mapjoin.maxsize</name>
+		<value>100000</value>
+		<description>Maximum # of rows of the small table that can be handled
+			by map-side join. If the size is reached and hive.task.progress is
+			set, a fatal error counter is set and the job will be killed.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.mapjoin.cache.numrows</name>
+		<value>25000</value>
+		<description>How many rows should be cached by jdbm for map join.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.skewjoin</name>
+		<value>false</value>
+		<description>Whether to enable skew join optimization. </description>
+	</property>
+
+	<property>
+		<name>hive.skewjoin.key</name>
+		<value>100000</value>
+		<description>Determine if we get a skew key in join. If we see more
+			than the specified number of rows with the same key in join operator,
+			we think the key as a skew join key. </description>
+	</property>
+
+	<property>
+		<name>hive.skewjoin.mapjoin.map.tasks</name>
+		<value>10000</value>
+		<description> Determine the number of map task used in the follow up
+			map join job
+			for a skew join. It should be used together with
+			hive.skewjoin.mapjoin.min.split
+			to perform a fine grained control.</description>
+	</property>
+
+	<property>
+		<name>hive.skewjoin.mapjoin.min.split</name>
+		<value>33554432</value>
+		<description> Determine the number of map task at most used in the
+			follow up map join job
+			for a skew join by specifying the minimum split size. It should be used
+			together with
+			hive.skewjoin.mapjoin.map.tasks to perform a fine grained control.</description>
+	</property>
+
+	<property>
+		<name>hive.mapred.mode</name>
+		<value>nonstrict</value>
+		<description>The mode in which the hive operations are being
+			performed. In strict mode, some risky queries are not allowed to run
+		</description>
+	</property>
+
+	<property>
+		<name>hive.exec.script.maxerrsize</name>
+		<value>100000</value>
+		<description>Maximum number of bytes a script is allowed to emit to
+			standard error (per map-reduce task). This prevents runaway scripts
+			from filling logs partitions to capacity </description>
+	</property>
+
+	<property>
+		<name>hive.exec.script.allow.partial.consumption</name>
+		<value>false</value>
+		<description> When enabled, this option allows a user script to exit
+			successfully without consuming all the data from the standard input.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.script.operator.id.env.var</name>
+		<value>HIVE_SCRIPT_OPERATOR_ID</value>
+		<description> Name of the environment variable that holds the unique
+			script operator ID in the user's transform function (the custom
+			mapper/reducer that the user has specified in the query)
+		</description>
+	</property>
+
+	<property>
+		<name>hive.exec.compress.output</name>
+		<value>false</value>
+		<description> This controls whether the final outputs of a query (to a
+			local/hdfs file or a hive table) is compressed. The compression codec
+			and other options are determined from hadoop config variables
+			mapred.output.compress* </description>
+	</property>
+
+	<property>
+		<name>hive.exec.compress.intermediate</name>
+		<value>false</value>
+		<description> This controls whether intermediate files produced by
+			hive between multiple map-reduce jobs are compressed. The compression
+			codec and other options are determined from hadoop config variables
+			mapred.output.compress* </description>
+	</property>
+
+	<property>
+		<name>hive.exec.parallel</name>
+		<value>false</value>
+		<description>Whether to execute jobs in parallel</description>
+	</property>
+
+	<property>
+		<name>hive.exec.parallel.thread.number</name>
+		<value>8</value>
+		<description>How many jobs at most can be executed in parallel
+		</description>
+	</property>
+
+	<property>
+		<name>hive.hwi.war.file</name>
+		<value>lib\hive-hwi-0.7.0.war</value>
+		<description>This sets the path to the HWI war file, relative to
+			${HIVE_HOME}. </description>
+	</property>
+
+	<property>
+		<name>hive.hwi.listen.host</name>
+		<value>0.0.0.0</value>
+		<description>This is the host address the Hive Web Interface will
+			listen on</description>
+	</property>
+
+	<property>
+		<name>hive.hwi.listen.port</name>
+		<value>9999</value>
+		<description>This is the port the Hive Web Interface will listen on
+		</description>
+	</property>
+
+	<property>
+		<name>hive.exec.pre.hooks</name>
+		<value></value>
+		<description>Pre Execute Hook for Tests</description>
+	</property>
+
+	<property>
+		<name>hive.merge.mapfiles</name>
+		<value>true</value>
+		<description>Merge small files at the end of a map-only job
+		</description>
+	</property>
+
+	<property>
+		<name>hive.merge.mapredfiles</name>
+		<value>false</value>
+		<description>Merge small files at the end of a map-reduce job
+		</description>
+	</property>
+
+	<property>
+		<name>hive.heartbeat.interval</name>
+		<value>1000</value>
+		<description>Send a heartbeat after this interval - used by mapjoin
+			and filter operators</description>
+	</property>
+
+	<property>
+		<name>hive.merge.size.per.task</name>
+		<value>256000000</value>
+		<description>Size of merged files at the end of the job</description>
+	</property>
+
+	<property>
+		<name>hive.merge.size.smallfiles.avgsize</name>
+		<value>16000000</value>
+		<description>When the average output file size of a job is less than
+			this number, Hive will start an additional map-reduce job to merge
+			the output files into bigger files. This is only done for map-only
+			jobs if hive.merge.mapfiles is true, and for map-reduce jobs if
+			hive.merge.mapredfiles is true.</description>
+	</property>
+
+	<property>
+		<name>hive.script.auto.progress</name>
+		<value>false</value>
+		<description>Whether Hive Tranform/Map/Reduce Clause should
+			automatically send progress information to TaskTracker to avoid the
+			task getting killed because of inactivity. Hive sends progress
+			information when the script is outputting to stderr. This option
+			removes the need of periodically producing stderr messages, but users
+			should be cautious because this may prevent infinite loops in the
+			scripts to be killed by TaskTracker.  </description>
+	</property>
+
+	<property>
+		<name>hive.script.serde</name>
+		<value>org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe</value>
+		<description>The default serde for trasmitting input data to and
+			reading output data from the user scripts. </description>
+	</property>
+
+	<property>
+		<name>hive.script.recordreader</name>
+		<value>org.apache.hadoop.hive.ql.exec.TextRecordReader</value>
+		<description>The default record reader for reading data from the user
+			scripts. </description>
+	</property>
+
+	<property>
+		<name>hive.script.recordwriter</name>
+		<value>org.apache.hadoop.hive.ql.exec.TextRecordWriter</value>
+		<description>The default record writer for writing data to the user
+			scripts. </description>
+	</property>
+
+	<property>
+		<name>hive.input.format</name>
+		<value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
+		<description>The default input format, if it is not specified, the
+			system assigns it. It is set to HiveInputFormat for hadoop versions
+			17, 18 and 19, whereas it is set to CombinedHiveInputFormat for
+			hadoop 20. The user can always overwrite it - if there is a bug in
+			CombinedHiveInputFormat, it can always be manually set to
+			HiveInputFormat. </description>
+	</property>
+
+	<property>
+		<name>hive.udtf.auto.progress</name>
+		<value>false</value>
+		<description>Whether Hive should automatically send progress
+			information to TaskTracker when using UDTF's to prevent the task
+			getting killed because of inactivity. Users should be cautious
+			because this may prevent TaskTracker from killing tasks with infinte
+			loops.  </description>
+	</property>
+
+	<property>
+		<name>hive.mapred.reduce.tasks.speculative.execution</name>
+		<value>true</value>
+		<description>Whether speculative execution for reducers should be
+			turned on. </description>
+	</property>
+
+	<property>
+		<name>hive.exec.counters.pull.interval</name>
+		<value>1000</value>
+		<description>The interval with which to poll the JobTracker for the
+			counters the running job. The smaller it is the more load there will
+			be on the jobtracker, the higher it is the less granular the caught
+			will be.</description>
+	</property>
+
+	<property>
+		<name>hive.enforce.bucketing</name>
+		<value>false</value>
+		<description>Whether bucketing is enforced. If true, while inserting
+			into the table, bucketing is enforced. </description>
+	</property>
+
+	<property>
+		<name>hive.enforce.sorting</name>
+		<value>false</value>
+		<description>Whether sorting is enforced. If true, while inserting
+			into the table, sorting is enforced. </description>
+	</property>
+
+	<property>
+		<name>hive.metastore.ds.connection.url.hook</name>
+		<value></value>
+		<description>Name of the hook to use for retriving the JDO connection
+			URL. If empty, the value in javax.jdo.option.ConnectionURL is used
+		</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.ds.retry.attempts</name>
+		<value>1</value>
+		<description>The number of times to retry a metastore call if there
+			were a connection error</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.ds.retry.interval</name>
+		<value>1000</value>
+		<description>The number of miliseconds between metastore retry
+			attempts</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.server.min.threads</name>
+		<value>200</value>
+		<description>Minimum number of worker threads in the Thrift server's
+			pool.</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.server.max.threads</name>
+		<value>100000</value>
+		<description>Maximum number of worker threads in the Thrift server's
+			pool.</description>
+	</property>
+
+	<property>
+		<name>hive.metastore.server.tcp.keepalive</name>
+		<value>true</value>
+		<description>Whether to enable TCP keepalive for the metastore server.
+			Keepalive will prevent accumulation of half-open connections.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.optimize.reducededuplication</name>
+		<value>true</value>
+		<description>Remove extra map-reduce jobs if the data is already
+			clustered by the same key which needs to be used again. This should
+			always be set to true. Since it is a new feature, it has been made
+			configurable.</description>
+	</property>
+
+	<property>
+		<name>hive.exec.dynamic.partition</name>
+		<value>false</value>
+		<description>Whether or not to allow dynamic partitions in DML/DDL.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.exec.dynamic.partition.mode</name>
+		<value>strict</value>
+		<description>In strict mode, the user must specify at least one static
+			partition in case the user accidentally overwrites all partitions.
+		</description>
+	</property>
+
+	<property>
+		<name>hive.exec.max.dynamic.partitions</name>
+		<value>1000</value>
+		<description>Maximum number of dynamic partitions allowed to be
+			created in total.</description>
+	</property>
+
+	<property>
+		<name>hive.exec.max.dynamic.partitions.pernode</name>
+		<value>100</value>
+		<description>Maximum number of dynamic partitions allowed to be
+			created in each mapper/reducer node.</description>
+	</property>
+
+	<property>
+		<name>hive.default.partition.name</name>
+		<value>__HIVE_DEFAULT_PARTITION__</value>
+		<description>The default partition name in case the dynamic partition
+			column value is null/empty string or anyother values that cannot be
+			escaped. This value must not contain any special character used in
+			HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the
+			dynamic partition value should not contain this value to avoid
+			confusions.</description>
+	</property>
+
+	<property>
+		<name>fs.har.impl</name>
+		<value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+		<description>The implementation for accessing Hadoop Archives. Note
+			that this won't be applicable to Hadoop vers less than 0.20
+		</description>
+	</property>
+
+	<property>
+		<name>hive.archive.enabled</name>
+		<value>false</value>
+		<description>Whether archiving operations are permitted</description>
+	</property>
+
+	<property>
+		<name>hive.archive.har.parentdir.settable</name>
+		<value>false</value>
+		<description>In new Hadoop versions, the parent directory must be set
+			while
+			creating a HAR. Because this functionality is hard to detect with just
+			version
+			numbers, this conf var needs to be set manually.</description>
+	</property>
+
+	<!-- HBase Storage Handler Parameters -->
+
+	<property>
+		<name>hive.hbase.wal.enabled</name>
+		<value>true</value>
+		<description>Whether writes to HBase should be forced to the
+			write-ahead log. Disabling this improves HBase write performance at
+			the risk of lost writes in case of a crash.</description>
+	</property>
+
+</configuration>
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
new file mode 100644
index 0000000..784a274
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/hive-log4j.properties
@@ -0,0 +1,58 @@
+#------------------------------------------------------------------------------
+#
+#  The following properties set the logging levels and log appender.  The
+#  log4j.rootCategory variable defines the default log level and one or more
+#  appenders.  For the console, use 'S'.  For the daily rolling file, use 'R'.
+#  For an HTML formatted log, use 'H'.
+#
+#  To override the default (rootCategory) log level, define a property of the
+#  form (see below for available values):
+#
+#        log4j.logger. =
+#
+#    Available logger names:
+#      TODO
+#
+#    Possible Log Levels:
+#      FATAL, ERROR, WARN, INFO, DEBUG
+#
+#------------------------------------------------------------------------------
+log4j.rootCategory=INFO, S
+
+log4j.logger.com.dappit.Dapper.parser=ERROR
+log4j.logger.org.w3c.tidy=FATAL
+
+#------------------------------------------------------------------------------
+#
+#  The following properties configure the console (stdout) appender.
+#  See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.S = org.apache.log4j.ConsoleAppender
+log4j.appender.S.layout = org.apache.log4j.PatternLayout
+log4j.appender.S.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n
+
+#------------------------------------------------------------------------------
+#
+#  The following properties configure the Daily Rolling File appender.
+#  See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.R = org.apache.log4j.DailyRollingFileAppender
+log4j.appender.R.File = logs/bensApps.log
+log4j.appender.R.Append = true
+log4j.appender.R.DatePattern = '.'yyy-MM-dd
+log4j.appender.R.layout = org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern = %d{yyyy-MM-dd HH:mm:ss} %c{1} [%p] %m%n
+
+#------------------------------------------------------------------------------
+#
+#  The following properties configure the Rolling File appender in HTML.
+#  See http://logging.apache.org/log4j/docs/api/index.html for details.
+#
+#------------------------------------------------------------------------------
+log4j.appender.H = org.apache.log4j.RollingFileAppender
+log4j.appender.H.File = logs/bensApps.html
+log4j.appender.H.MaxFileSize = 100KB
+log4j.appender.H.Append = false
+log4j.appender.H.layout = org.apache.log4j.HTMLLayout
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/master b/hivesterix/hivesterix-dist/src/main/resources/conf/master
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/src/main/resources/conf/slaves b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
new file mode 100644
index 0000000..2fbb50c
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
new file mode 100644
index 0000000..914aae3
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/cli.sh
@@ -0,0 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=cli
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+cli () {
+  CLASS=org.apache.hadoop.hive.cli.CliDriver
+  execHiveCmd $CLASS "$@"
+}
+
+cli_help () {
+  CLASS=org.apache.hadoop.hive.cli.CliDriver
+  execHiveCmd $CLASS "--help"
+} 
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
new file mode 100644
index 0000000..432859a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/help.sh
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=help
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+help() {
+  echo "Usage ./hive <parameters> --service serviceName <service parameters>"
+  echo "Service List: $SERVICE_LIST"
+  echo "Parameters parsed:"
+  echo "  --auxpath : Auxillary jars "
+  echo "  --config : Hive configuration directory"
+  echo "  --service : Starts specific service/component. cli is default"
+  echo "Parameters used:"
+  echo "  HADOOP_HOME : Hadoop install directory"
+  echo "  HIVE_OPT : Hive options"
+  echo "For help on a particular service:"
+  echo "  ./hive --service serviceName --help"
+}
+
+help_help(){
+  help
+}
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
new file mode 100644
index 0000000..b5edce4
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hiveserver.sh
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=hiveserver
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+hiveserver() {
+  echo "Starting Hive Thrift Server"
+  CLASS=org.apache.hadoop.hive.service.HiveServer
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+  JAR=${HIVE_LIB}/hive-service-*.jar
+
+  # hadoop 20 or newer - skip the aux_jars option and hiveconf
+  exec $HADOOP jar $JAR $CLASS $HIVE_PORT "$@"
+}
+
+hiveserver_help() {
+  echo "usage HIVE_PORT=xxxx ./hive --service hiveserver" 
+  echo "  HIVE_PORT : Specify the server port"
+}
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
new file mode 100644
index 0000000..f9cd8ec
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/hwi.sh
@@ -0,0 +1,50 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=hwi
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+hwi() {
+
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+
+  CLASS=org.apache.hadoop.hive.hwi.HWIServer
+  # The ls hack forces the * to be expanded which is required because 
+  # System.getenv doesn't do globbing
+  export HWI_JAR_FILE=$(ls ${HIVE_LIB}/hive-hwi-*.jar)
+  export HWI_WAR_FILE=$(ls ${HIVE_LIB}/hive-hwi-*.war)
+
+  #hwi requires ant jars
+  if [ "$ANT_LIB" = "" ] ; then
+    ANT_LIB=/opt/ant/lib
+  fi
+  for f in ${ANT_LIB}/*.jar; do
+    if [[ ! -f $f ]]; then
+      continue;
+    fi
+    HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:$f
+  done
+
+  export HADOOP_CLASSPATH
+  
+  # hadoop 20 or newer - skip the aux_jars option and hiveconf
+  exec $HADOOP jar ${HWI_JAR_FILE} $CLASS $HIVE_OPTS "$@"
+}
+
+hwi_help(){
+  echo "Usage ANT_LIB=XXXX hive --service hwi"	
+}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
new file mode 100644
index 0000000..b52f9a7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/jar.sh
@@ -0,0 +1,47 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=jar
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+jar () {
+  RUNJAR=$1
+  shift
+
+  RUNCLASS=$1
+  shift
+
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+
+  if [ -z "$RUNJAR" ] ; then
+    echo "RUNJAR not specified"
+    exit 3
+  fi
+
+  if [ -z "$RUNCLASS" ] ; then
+    echo "RUNCLASS not specified"
+    exit 3
+  fi
+
+  # hadoop 20 or newer - skip the aux_jars option and hiveconf
+  exec $HADOOP jar $RUNJAR $RUNCLASS $HIVE_OPTS "$@"
+}
+
+jar_help () {
+  echo "Used for applications that require Hadoop and Hive classpath and environment."
+  echo "./hive --service jar <yourjar> <yourclass> HIVE_OPTS <your_args>"
+}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
new file mode 100644
index 0000000..993bc8d
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/lineage.sh
@@ -0,0 +1,38 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=lineage
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+lineage () {
+  CLASS=org.apache.hadoop.hive.ql.tools.LineageInfo
+
+  # cli specific code
+  if [ ! -f ${HIVE_LIB}/hive-exec-*.jar ]; then
+    echo "Missing Hive exec Jar"
+    exit 3;
+  fi
+
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+
+  exec $HADOOP jar ${HIVE_LIB}/hive-exec-*.jar $CLASS  "$@"
+}
+
+lineage_help () {
+  echo "usage ./hive 'hql' "
+} 
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
new file mode 100644
index 0000000..db15f6e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/metastore.sh
@@ -0,0 +1,35 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=metastore
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+metastore() {
+  echo "Starting Hive Metastore Server"
+  CLASS=org.apache.hadoop.hive.metastore.HiveMetaStore
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+  JAR=${HIVE_LIB}/hive-service-*.jar
+
+  # hadoop 20 or newer - skip the aux_jars option and hiveconf
+  exec $HADOOP jar $JAR $CLASS $METASTORE_PORT "$@"
+}
+
+metastore_help() {
+  echo "usage METASTORE_PORT=xxxx ./hive --service metastore"
+  echo "  METASTORE_PORT : Specify the metastore server port"
+}
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
new file mode 100644
index 0000000..3a9264b
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/rcfilecat.sh
@@ -0,0 +1,27 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+THISSERVICE=rcfilecat
+export SERVICE_LIST="${SERVICE_LIST}${THISSERVICE} "
+
+rcfilecat () {
+  CLASS=org.apache.hadoop.hive.cli.RCFileCat
+  HIVE_OPTS=''
+  execHiveCmd $CLASS "$@"
+}
+
+rcfilecat_help () {
+  echo "usage ./hive rcfilecat [--start='startoffset'] [--length='len'] "
+} 
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
new file mode 100644
index 0000000..167cc40
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/ext/util/execHiveCmd.sh
@@ -0,0 +1,32 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+execHiveCmd () {
+  CLASS=$1;
+  shift;
+
+  # cli specific code
+  if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
+    echo "Missing Hive CLI Jar"
+    exit 3;
+  fi
+
+  if $cygwin; then
+    HIVE_LIB=`cygpath -w "$HIVE_LIB"`
+  fi
+
+  # hadoop 20 or newer - skip the aux_jars option. picked up from hiveconf
+  exec $HADOOP jar ${HIVE_LIB}/hive-cli-*.jar $CLASS $HIVE_OPTS "$@"
+}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
new file mode 100755
index 0000000..8c9ae76
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/getip.sh
@@ -0,0 +1,25 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+        #Get IP Address
+        IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig em1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+	if [ "$IPADDR" = "" ]
+        then
+		IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi 
+else
+        IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+
+fi
+echo $IPADDR
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
new file mode 100755
index 0000000..f98f340
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive
@@ -0,0 +1,213 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cygwin=false
+case "`uname`" in
+   CYGWIN*) cygwin=true;;
+esac
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+. "$bin"/hive-config.sh
+
+SERVICE=""
+HELP=""
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --service)
+      shift
+      SERVICE=$1
+      shift
+      ;;
+    --rcfilecat)
+      SERVICE=rcfilecat
+      shift
+      ;;
+    --help)
+      HELP=_help
+      shift
+      ;;
+    *)
+      break
+      ;;
+  esac
+done
+
+if [ "$SERVICE" = "" ] ; then
+  if [ "$HELP" = "_help" ] ; then
+    SERVICE="help"
+  else
+    SERVICE="cli"
+  fi
+fi
+
+if [ -f "${HIVE_CONF_DIR}/hive-env.sh" ]; then
+  . "${HIVE_CONF_DIR}/hive-env.sh"
+fi
+
+CLASSPATH="${HIVE_CONF_DIR}"
+
+HIVE_LIB=${HIVE_HOME}/lib
+
+# needed for execution
+if [ ! -f ${HIVE_LIB}/hive-exec-*.jar ]; then
+  echo "Missing Hive Execution Jar: ${HIVE_LIB}/hive-exec-*.jar"
+  exit 1;
+fi
+
+if [ ! -f ${HIVE_LIB}/hive-metastore-*.jar ]; then
+  echo "Missing Hive MetaStore Jar"
+  exit 2;
+fi
+
+# cli specific code
+if [ ! -f ${HIVE_LIB}/hive-cli-*.jar ]; then
+  echo "Missing Hive CLI Jar"
+  exit 3;
+fi
+
+CLASSPATH=${CLASSPATH}:${HIVE_LIB}/a-hive-path.jar
+
+for f in ${HIVE_LIB}/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add the auxillary jars such as serdes
+if [ -d "${HIVE_AUX_JARS_PATH}" ]; then
+  for f in ${HIVE_AUX_JARS_PATH}/*.jar; do
+    if [[ ! -f $f ]]; then
+        continue;
+    fi
+    if $cygwin; then
+	f=`cygpath -w "$f"`
+    fi
+    AUX_CLASSPATH=${AUX_CLASSPATH}:$f
+    if [ "${AUX_PARAM}" == "" ]; then
+        AUX_PARAM=file://$f
+    else
+        AUX_PARAM=${AUX_PARAM},file://$f;
+    fi
+  done
+elif [ "${HIVE_AUX_JARS_PATH}" != "" ]; then 
+  if $cygwin; then
+      HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/,/:/g'`
+      HIVE_AUX_JARS_PATH=`cygpath -p -w "$HIVE_AUX_JARS_PATH"`
+      HIVE_AUX_JARS_PATH=`echo $HIVE_AUX_JARS_PATH | sed 's/;/,/g'`
+  fi
+  AUX_CLASSPATH=${HIVE_AUX_JARS_PATH}
+  AUX_PARAM=file://${HIVE_AUX_JARS_PATH}
+  AUX_PARAM=`echo $AUX_PARAM | sed 's/,/,file:\/\//g'`
+fi
+
+# adding jars from auxlib directory
+for f in ${HIVE_HOME}/auxlib/*.jar; do
+  if [[ ! -f $f ]]; then
+      continue;
+  fi
+  if $cygwin; then
+      f=`cygpath -w "$f"`
+  fi
+  AUX_CLASSPATH=${AUX_CLASSPATH}:$f
+  if [ "${AUX_PARAM}" == "" ]; then
+    AUX_PARAM=file://$f
+  else
+    AUX_PARAM=${AUX_PARAM},file://$f;
+  fi
+done
+if $cygwin; then
+    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+    CLASSPATH=${CLASSPATH};${AUX_CLASSPATH}
+else
+    CLASSPATH=${CLASSPATH}:${AUX_CLASSPATH}
+fi
+
+# pass classpath to hadoop
+export HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${CLASSPATH}"
+
+# check for hadoop in the path
+HADOOP_IN_PATH=`which hadoop 2>/dev/null`
+if [ -f ${HADOOP_IN_PATH} ]; then
+  HADOOP_DIR=`dirname "$HADOOP_IN_PATH"`/..
+fi
+# HADOOP_HOME env variable overrides hadoop in the path
+HADOOP_HOME=${HADOOP_HOME:-$HADOOP_DIR}
+if [ "$HADOOP_HOME" == "" ]; then
+  echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+  exit 4;
+fi
+
+HADOOP=$HADOOP_HOME/bin/hadoop
+if [ ! -f ${HADOOP} ]; then
+  echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+  exit 4;
+fi
+
+# Make sure we're using a compatible version of Hadoop
+hadoop_version=$($HADOOP version | awk '{if (NR == 1) {print $2;}}');
+
+# Save the regex to a var to workaround quoting incompatabilities
+# between Bash 3.1 and 3.2
+hadoop_version_re="^([[:digit:]]+)\.([[:digit:]]+)(\.([[:digit:]]+))?.*$"
+
+if [[ "$hadoop_version" =~ $hadoop_version_re ]]; then
+    hadoop_major_ver=${BASH_REMATCH[1]}
+    hadoop_minor_ver=${BASH_REMATCH[2]}
+    hadoop_patch_ver=${BASH_REMATCH[4]}
+else
+    echo "Unable to determine Hadoop version information."
+    echo "'hadoop version' returned:"
+    echo `$HADOOP version`
+    exit 5
+fi
+
+if [ $hadoop_minor_ver -ne 20 -o $hadoop_patch_ver -eq 0 ]; then
+    echo "Hive requires Hadoop 0.20.x (x >= 1)."
+    echo "'hadoop version' returned:"
+    echo `$HADOOP version`
+    exit 6
+fi
+
+if [ "${AUX_PARAM}" != "" ]; then
+  HIVE_OPTS="$HIVE_OPTS -hiveconf hive.aux.jars.path=${AUX_PARAM}"
+  AUX_JARS_CMD_LINE="-libjars ${AUX_PARAM}"
+fi
+
+SERVICE_LIST=""
+
+for i in "$bin"/ext/*.sh ; do
+  . $i
+done
+
+for i in "$bin"/ext/util/*.sh ; do
+  . $i
+done
+
+TORUN=""
+for j in $SERVICE_LIST ; do
+  if [ "$j" = "$SERVICE" ] ; then
+    TORUN=${j}$HELP
+  fi
+done
+
+if [ "$TORUN" = "" ] ; then
+  echo "Service $SERVICE not found"
+  echo "Available Services: $SERVICE_LIST"
+  exit 7
+else
+  $TORUN "$@"
+fi
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
new file mode 100755
index 0000000..2524bbc
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/hive-config.sh
@@ -0,0 +1,68 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# processes --config option from command line
+#
+
+this="$0"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`cd "$bin"; pwd`
+this="$bin/$script"
+
+# the root of the Hadoop installation
+export HIVE_HOME=`dirname "$bin"`
+
+#check to see if the conf dir is given as an optional argument
+while [ $# -gt 0 ]; do    # Until you run out of parameters . . .
+  case "$1" in
+    --config)
+        shift
+        confdir=$1
+        shift
+        HIVE_CONF_DIR=$confdir
+        ;;
+    --auxpath)
+        shift
+        HIVE_AUX_JARS_PATH=$1
+        shift
+        ;;
+    *)
+        break;
+        ;;
+  esac
+done
+
+
+# Allow alternate conf dir location.
+HIVE_CONF_DIR="${HIVE_CONF_DIR:-$HIVE_HOME/conf}"
+
+export HIVE_CONF_DIR=$HIVE_CONF_DIR
+export HIVE_AUX_JARS_PATH=$HIVE_AUX_JARS_PATH
+
+# Default to use 256MB 
+export HADOOP_HEAPSIZE=${HADOOP_HEAPSIZE:-256}
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
new file mode 100755
index 0000000..ec3997a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/init-hive-dfs.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+# The purpose of this script is to set warehouse's directories on HDFS
+
+DEFAULT_WAREHOUSE_DIR="/user/hive/warehouse"
+DEFAULT_TMP_DIR="/tmp"
+
+WAREHOUSE_DIR=${DEFAULT_WAREHOUSE_DIR}
+TMP_DIR=${DEFAULT_TMP_DIR}
+HELP=""
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --warehouse-dir)
+      shift
+      WAREHOUSE_DIR=$1
+      shift
+      ;;
+    --tmp-dir)
+      shift
+      TMP_DIR=$1
+      shift
+      ;;
+    --help)
+      HELP=_help
+      shift
+      ;;
+    *)
+      echo "Invalid parameter: $1"
+      HELP=_help
+      break
+      ;;
+  esac
+done
+
+if [ "$HELP" = "_help" ] ; then
+  echo "Usage $0 [--warehouse-dir <Hive user>] [--tmp-dir <Tmp dir>]"
+  echo "Default value of warehouse directory is: [$DEFAULT_WAREHOUSE_DIR]"
+  echo "Default value of the temporary directory is: [$DEFAULT_TMP_DIR]"
+  exit -1
+fi
+
+
+# check for hadoop in the path
+HADOOP_IN_PATH=`which hadoop 2>/dev/null`
+if [ -f ${HADOOP_IN_PATH} ]; then
+  HADOOP_DIR=`dirname "$HADOOP_IN_PATH"`/..
+fi
+# HADOOP_HOME env variable overrides hadoop in the path
+HADOOP_HOME=${HADOOP_HOME:-$HADOOP_DIR}
+if [ "$HADOOP_HOME" == "" ]; then
+  echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+  exit 4;
+fi
+
+HADOOP_EXEC=$HADOOP_HOME/bin/hadoop
+if [ ! -f ${HADOOP} ]; then
+  echo "Cannot find hadoop installation: \$HADOOP_HOME must be set or hadoop must be in the path";
+  exit 4;
+fi
+
+
+# Ensure /tmp exist
+$HADOOP_EXEC fs -test -d ${TMP_DIR} > /dev/null 2>&1
+if [ $? -ne 0 ] 
+then
+  echo "Creating directory [${TMP_DIR}]"
+  $HADOOP_EXEC fs -mkdir ${TMP_DIR}
+fi
+
+echo "Setting writeable group rights for directory [${TMP_DIR}]"
+$HADOOP_EXEC fs -chmod g+w ${TMP_DIR}
+
+
+# Ensure warehouse dir exist
+$HADOOP_EXEC fs -test -d ${WAREHOUSE_DIR} > /dev/null 2>&1
+if [ $? -ne 0 ] 
+then
+  echo "Creating directory [${WAREHOUSE_DIR}]"
+  $HADOOP_EXEC fs -mkdir ${WAREHOUSE_DIR}
+fi
+
+echo "Setting writeable group rights for directory [${WAREHOUSE_DIR}]"
+$HADOOP_EXEC fs -chmod g+w ${WAREHOUSE_DIR}
+
+echo "Initialization done."
+echo
+echo "Please, do not forget to set the following configuration properties in hive-site.xml:"
+echo "hive.metastore.warehouse.dir=${WAREHOUSE_DIR}"
+echo "hive.exec.scratchdir=${TMP_DIR}"
+
+exit 0
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
new file mode 100644
index 0000000..d30da26
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${PREGELIX_PATH}; export JAVA_HOME=${JAVA_HOME}; bin/startnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
new file mode 100644
index 0000000..6aa9161
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startCluster.sh
@@ -0,0 +1,19 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
+
+. conf/cluster.properties
+# do we need to specify the version somewhere?
+hyrackcmd=`ls ${HYRACKS_HOME}/hyracks-cli/target/hyracks-cli-*-binary-assembly/bin/hyrackscli`
+# find zip file
+appzip=`ls $PWD/../hivesterix-dist-*-binary-assembly.zip`
+
+[ -f $hyrackcmd ] || { echo "Hyracks commandline is missing"; exit -1;}
+[ -f $appzip ] || { echo "Genomix binary-assembly.zip is missing"; exit -1;}
+
+CCHOST_NAME=`cat conf/master`
+
+IPADDR=`bin/getip.sh`
+echo "connect to \"${IPADDR}:${CC_CLIENTPORT}\"; create application hivesterix \"$appzip\";" | $hyrackcmd 
+echo ""
+
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
new file mode 100755
index 0000000..fe6cf27
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
new file mode 100644
index 0000000..efb79ce
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 0 &> $CCLOGS_DIR/cc.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
new file mode 100644
index 0000000..6e0f90e
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
new file mode 100644
index 0000000..12367c1
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${PREGELIX_PATH}; bin/stopnc.sh"
+done
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
new file mode 100644
index 0000000..4889934
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
new file mode 100644
index 0000000..c2f525a
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
new file mode 100644
index 0000000..03ce4e7
--- /dev/null
+++ b/hivesterix/hivesterix-dist/src/main/resources/scripts/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+  USERID=`id | sed 's/^uid=//;s/(.*$//'`
+  PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*