Merged fullstack_staging_bigmerge_target -r 2735:2760

git-svn-id: https://hyracks.googlecode.com/svn/branches/fullstack_staging@2768 123451ca-8445-de46-9d55-352943316053
diff --git a/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/AbstractCompilerFactoryBuilder.java b/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/AbstractCompilerFactoryBuilder.java
index f1e7acb..dde4443 100644
--- a/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/AbstractCompilerFactoryBuilder.java
+++ b/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/AbstractCompilerFactoryBuilder.java
@@ -30,6 +30,7 @@
 import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFamilyProvider;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
 import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IPrinterFactoryProvider;
@@ -44,6 +45,7 @@
     protected ITypeTraitProvider typeTraitProvider;
     protected ISerializerDeserializerProvider serializerDeserializerProvider;
     protected IBinaryHashFunctionFactoryProvider hashFunctionFactoryProvider;
+    protected IBinaryHashFunctionFamilyProvider hashFunctionFamilyProvider;
     protected IBinaryComparatorFactoryProvider comparatorFactoryProvider;
     protected IBinaryBooleanInspectorFactory binaryBooleanInspectorFactory;
     protected IBinaryIntegerInspectorFactory binaryIntegerInspectorFactory;
@@ -94,6 +96,14 @@
         return hashFunctionFactoryProvider;
     }
 
+    public void setHashFunctionFamilyProvider(IBinaryHashFunctionFamilyProvider hashFunctionFamilyProvider) {
+        this.hashFunctionFamilyProvider = hashFunctionFamilyProvider;
+    }
+
+    public IBinaryHashFunctionFamilyProvider getHashFunctionFamilyProvider() {
+        return hashFunctionFamilyProvider;
+    }
+
     public void setComparatorFactoryProvider(IBinaryComparatorFactoryProvider comparatorFactoryProvider) {
         this.comparatorFactoryProvider = comparatorFactoryProvider;
     }
diff --git a/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/HeuristicCompilerFactoryBuilder.java b/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/HeuristicCompilerFactoryBuilder.java
index 1d21463..edc1b66 100644
--- a/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/HeuristicCompilerFactoryBuilder.java
+++ b/algebricks/algebricks-compiler/src/main/java/edu/uci/ics/hyracks/algebricks/compiler/api/HeuristicCompilerFactoryBuilder.java
@@ -84,11 +84,12 @@
                     public JobSpecification createJob(Object appContext) throws AlgebricksException {
                         AlgebricksConfig.ALGEBRICKS_LOGGER.fine("Starting Job Generation.\n");
                         JobGenContext context = new JobGenContext(null, metadata, appContext,
-                                serializerDeserializerProvider, hashFunctionFactoryProvider, comparatorFactoryProvider,
-                                typeTraitProvider, binaryBooleanInspectorFactory, binaryIntegerInspectorFactory,
-                                printerProvider, nullWriterFactory, normalizedKeyComputerFactoryProvider,
-                                expressionRuntimeProvider, expressionTypeComputer, nullableTypeComputer, oc,
-                                expressionEvalSizeComputer, partialAggregationTypeComputer, frameSize, clusterLocations);
+                                serializerDeserializerProvider, hashFunctionFactoryProvider,
+                                hashFunctionFamilyProvider, comparatorFactoryProvider, typeTraitProvider,
+                                binaryBooleanInspectorFactory, binaryIntegerInspectorFactory, printerProvider,
+                                nullWriterFactory, normalizedKeyComputerFactoryProvider, expressionRuntimeProvider,
+                                expressionTypeComputer, nullableTypeComputer, oc, expressionEvalSizeComputer,
+                                partialAggregationTypeComputer, frameSize, clusterLocations);
                         PlanCompiler pc = new PlanCompiler(context);
                         return pc.compilePlan(plan, null);
                     }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/ILogicalOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/ILogicalOperator.java
index 165fccd..6701385 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/ILogicalOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/ILogicalOperator.java
@@ -89,4 +89,9 @@
     public IPhysicalPropertiesVector getDeliveredPhysicalProperties();
 
     public void computeDeliveredPhysicalProperties(IOptimizationContext context) throws AlgebricksException;
+    
+    /**
+     * Indicates whether the expressions used by this operator must be variable reference expressions.
+     */
+    public boolean requiresVariableReferenceExpressions();
 }
\ No newline at end of file
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/IOptimizationContext.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/IOptimizationContext.java
index 468d25c..0aa1ff6 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/IOptimizationContext.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/base/IOptimizationContext.java
@@ -51,6 +51,8 @@
      * returns true if op1 and op2 have already been compared
      */
     public abstract boolean checkAndAddToAlreadyCompared(ILogicalOperator op1, ILogicalOperator op2);
+    
+    public abstract void removeFromAlreadyCompared(ILogicalOperator op1);
 
     public abstract void addNotToBeInlinedVar(LogicalVariable var);
 
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/ScalarFunctionCallExpression.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/ScalarFunctionCallExpression.java
index bf3f82b..b284b22 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/ScalarFunctionCallExpression.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/ScalarFunctionCallExpression.java
@@ -39,9 +39,11 @@
 
     @Override
     public ScalarFunctionCallExpression cloneExpression() {
-        cloneAnnotations();
         List<Mutable<ILogicalExpression>> clonedArgs = cloneArguments();
-        return new ScalarFunctionCallExpression(finfo, clonedArgs);
+        ScalarFunctionCallExpression funcExpr = new ScalarFunctionCallExpression(finfo, clonedArgs);
+        funcExpr.getAnnotations().putAll(cloneAnnotations());
+        funcExpr.setOpaqueParameters(this.getOpaqueParameters());
+        return funcExpr;
     }
 
     @Override
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/UnnestingFunctionCallExpression.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/UnnestingFunctionCallExpression.java
index 71932d8..652f9b0 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/UnnestingFunctionCallExpression.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/expressions/UnnestingFunctionCallExpression.java
@@ -45,6 +45,7 @@
         List<Mutable<ILogicalExpression>> clonedArgs = cloneArguments();
         UnnestingFunctionCallExpression ufce = new UnnestingFunctionCallExpression(finfo, clonedArgs);
         ufce.setReturnsUniqueValues(returnsUniqueValues);
+        ufce.setOpaqueParameters(this.getOpaqueParameters());
         return ufce;
     }
 
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AbstractLogicalOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AbstractLogicalOperator.java
index dc0edfe..64dbdef 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AbstractLogicalOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AbstractLogicalOperator.java
@@ -182,4 +182,9 @@
         return new PropagatingTypeEnvironment(ctx.getExpressionTypeComputer(), ctx.getNullableTypeComputer(),
                 ctx.getMetadataProvider(), TypePropagationPolicy.ALL, envPointers);
     }
+    
+    @Override
+    public boolean requiresVariableReferenceExpressions() {
+        return true;
+    }
 }
\ No newline at end of file
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
index a4dc2e0..4543997 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/AssignOperator.java
@@ -89,4 +89,8 @@
         return env;
     }
 
+    @Override
+    public boolean requiresVariableReferenceExpressions() {
+        return false;
+    }
 }
\ No newline at end of file
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DataSourceScanOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DataSourceScanOperator.java
index 3227f3d..3c21c8f 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DataSourceScanOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DataSourceScanOperator.java
@@ -106,5 +106,4 @@
         }
         return env;
     }
-
 }
\ No newline at end of file
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DieOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DieOperator.java
index 20aa574..03bfcba 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DieOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DieOperator.java
@@ -81,4 +81,8 @@
         return createPropagatingAllInputsTypeEnvironment(ctx);
     }
 
+    @Override
+    public boolean requiresVariableReferenceExpressions() {
+        return false;
+    }
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
index ee0dcf6..b1da831 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/DistinctOperator.java
@@ -28,6 +28,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;

 import edu.uci.ics.hyracks.algebricks.core.algebra.properties.VariablePropagationPolicy;

 import edu.uci.ics.hyracks.algebricks.core.algebra.typing.ITypingContext;

+import edu.uci.ics.hyracks.algebricks.core.algebra.typing.NonPropagatingTypeEnvironment;

 import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;

 import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalOperatorVisitor;

 

@@ -49,12 +50,34 @@
 

     @Override

     public void recomputeSchema() {

-        schema = new ArrayList<LogicalVariable>(inputs.get(0).getValue().getSchema());

+        if (schema == null) {

+            schema = new ArrayList<LogicalVariable>();

+        }

+        schema.clear();

+        for (Mutable<ILogicalExpression> eRef : expressions) {

+            ILogicalExpression e = eRef.getValue();

+            if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {

+                VariableReferenceExpression v = (VariableReferenceExpression) e;

+                schema.add(v.getVariableReference());

+            }

+        }

     }

 

     @Override

     public VariablePropagationPolicy getVariablePropagationPolicy() {

-        return VariablePropagationPolicy.ALL;

+        return new VariablePropagationPolicy() {

+            @Override

+            public void propagateVariables(IOperatorSchema target, IOperatorSchema... sources)

+                    throws AlgebricksException {

+                for (Mutable<ILogicalExpression> eRef : expressions) {

+                    ILogicalExpression e = eRef.getValue();

+                    if (e.getExpressionTag() == LogicalExpressionTag.VARIABLE) {

+                        VariableReferenceExpression v = (VariableReferenceExpression) e;

+                        target.addVariable(v.getVariableReference());

+                    }

+                }

+            }

+        };

     }

 

     @Override

@@ -105,7 +128,16 @@
 

     @Override

     public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException {

-        return createPropagatingAllInputsTypeEnvironment(ctx);

+        IVariableTypeEnvironment env = new NonPropagatingTypeEnvironment(ctx.getExpressionTypeComputer(), ctx.getMetadataProvider());

+        IVariableTypeEnvironment childEnv = ctx.getOutputTypeEnvironment(inputs.get(0).getValue());

+        for (Mutable<ILogicalExpression> exprRef : expressions) {

+            ILogicalExpression expr = exprRef.getValue();

+            if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {

+                VariableReferenceExpression varRefExpr = (VariableReferenceExpression) expr;

+                env.setVarType(varRefExpr.getVariableReference(), childEnv.getType(expr));

+            }

+        }

+        return env;

     }

 

 }

diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/ExtensionOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/ExtensionOperator.java
index 101b6f5..5aa858f 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/ExtensionOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/ExtensionOperator.java
@@ -51,7 +51,7 @@
 
     @Override
     public boolean acceptExpressionTransform(ILogicalExpressionReferenceTransform transform) throws AlgebricksException {
-        return false;
+        return delegate.acceptExpressionTransform(transform);
     }
 
     @Override
@@ -108,4 +108,13 @@
         return this.createPropagatingAllInputsTypeEnvironment(ctx);
     }
 
+    @Override
+    public String toString() {
+        return delegate.toString();
+    }
+    
+    public IOperatorExtension getDelegate() {
+        return delegate;
+    }
+
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/IOperatorExtension.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/IOperatorExtension.java
index 98c3301..0a80337 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/IOperatorExtension.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/IOperatorExtension.java
@@ -14,6 +14,7 @@
  */
 package edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical;
 
+import java.util.Collection;
 import java.util.List;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -42,5 +43,8 @@
     void setPhysicalOperator(IPhysicalOperator physicalOperator);
 
     ExecutionMode getExecutionMode();
-
+    
+    public void getUsedVariables(Collection<LogicalVariable> usedVars);
+    
+    public void getProducedVariables(Collection<LogicalVariable> producedVars);
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/LimitOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/LimitOperator.java
index 3c6a699..8e67ed9 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/LimitOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/LimitOperator.java
@@ -109,5 +109,4 @@
     public IVariableTypeEnvironment computeOutputTypeEnvironment(ITypingContext ctx) throws AlgebricksException {
         return createPropagatingAllInputsTypeEnvironment(ctx);
     }
-
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
index 8c611b0..fda920a 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/SelectOperator.java
@@ -102,4 +102,8 @@
         return env;
     }
 
+    @Override
+    public boolean requiresVariableReferenceExpressions() {
+        return false;
+    }
 }
\ No newline at end of file
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
index 994c6cb..78b6801 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/ProducedVariableVisitor.java
@@ -249,6 +249,7 @@
 

     @Override

     public Void visitExtensionOperator(ExtensionOperator op, Void arg) throws AlgebricksException {

+        op.getDelegate().getProducedVariables(producedVariables);

         return null;

     }

 }

diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
index 9295179..a759e35 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/SchemaVariableVisitor.java
@@ -85,7 +85,13 @@
 

     @Override

     public Void visitDistinctOperator(DistinctOperator op, Void arg) throws AlgebricksException {

-        standardLayout(op);

+        for (Mutable<ILogicalExpression> exprRef : op.getExpressions()) {

+            ILogicalExpression expr = exprRef.getValue();

+            if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {

+                VariableReferenceExpression varRefExpr = (VariableReferenceExpression) expr;

+                schemaVariables.add(varRefExpr.getVariableReference());

+            }

+        }

         return null;

     }

 

diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
index 3a82ccd..0ea9367 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/UsedVariableVisitor.java
@@ -25,6 +25,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;

 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;

+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IPhysicalOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;

@@ -33,6 +34,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.DistinctOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.EmptyTupleSourceOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExchangeOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.InnerJoinOperator;

@@ -49,13 +51,17 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ScriptOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SinkOperator;

-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ExtensionOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SubplanOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteOperator;

 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.WriteResultOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.HashPartitionExchangePOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.HashPartitionMergeExchangePOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.RangePartitionPOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.physical.SortMergeExchangePOperator;

+import edu.uci.ics.hyracks.algebricks.core.algebra.properties.OrderColumn;

 import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalOperatorVisitor;

 

 public class UsedVariableVisitor implements ILogicalOperatorVisitor<Void, Void> {

@@ -106,8 +112,49 @@
     }

 

     @Override

-    public Void visitExchangeOperator(ExchangeOperator op, Void arg) {

-        // does not use any variable

+    public Void visitExchangeOperator(ExchangeOperator op, Void arg) throws AlgebricksException {

+        // Used variables depend on the physical operator.

+        if (op.getPhysicalOperator() != null) {

+            IPhysicalOperator physOp = op.getPhysicalOperator();

+            switch (physOp.getOperatorTag()) {

+                case BROADCAST_EXCHANGE:

+                case ONE_TO_ONE_EXCHANGE:

+                case RANDOM_MERGE_EXCHANGE: {

+                    // No variables used.

+                    break;

+                }

+                case HASH_PARTITION_EXCHANGE: {

+                    HashPartitionExchangePOperator concreteOp = (HashPartitionExchangePOperator) physOp;

+                    usedVariables.addAll(concreteOp.getHashFields());

+                    break;

+                }

+                case HASH_PARTITION_MERGE_EXCHANGE: {

+                    HashPartitionMergeExchangePOperator concreteOp = (HashPartitionMergeExchangePOperator) physOp;

+                    usedVariables.addAll(concreteOp.getPartitionFields());

+                    for (OrderColumn orderCol : concreteOp.getOrderColumns()) {

+                        usedVariables.add(orderCol.getColumn());

+                    }

+                    break;

+                }

+                case SORT_MERGE_EXCHANGE: {

+                    SortMergeExchangePOperator concreteOp = (SortMergeExchangePOperator) physOp;

+                    for (OrderColumn orderCol : concreteOp.getSortColumns()) {

+                        usedVariables.add(orderCol.getColumn());

+                    }

+                    break;

+                }

+                case RANGE_PARTITION_EXCHANGE: {

+                    RangePartitionPOperator concreteOp = (RangePartitionPOperator) physOp;

+                    for (OrderColumn partCol : concreteOp.getPartitioningFields()) {

+                        usedVariables.add(partCol.getColumn());

+                    }

+                    break;

+                }

+                default: {

+                    throw new AlgebricksException("Unhandled physical operator tag '" + physOp.getOperatorTag() + "'.");

+                }

+            }

+        }

         return null;

     }

 

@@ -297,6 +344,7 @@
 

     @Override

     public Void visitExtensionOperator(ExtensionOperator op, Void arg) throws AlgebricksException {

+        op.getDelegate().getUsedVariables(usedVariables);

         return null;

     }

 

diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/VariableUtilities.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/VariableUtilities.java
index 25f22e7..f66f99b 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/VariableUtilities.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/logical/visitors/VariableUtilities.java
@@ -61,14 +61,22 @@
             ITypingContext ctx) throws AlgebricksException {

         substituteVariables(op, v1, v2, true, ctx);

     }

-

+    

+    public static void substituteVariablesInDescendantsAndSelf(ILogicalOperator op, LogicalVariable v1,

+            LogicalVariable v2, ITypingContext ctx) throws AlgebricksException {

+        for (Mutable<ILogicalOperator> childOp : op.getInputs()) {

+            substituteVariablesInDescendantsAndSelf(childOp.getValue(), v1, v2, ctx);

+        }

+        substituteVariables(op, v1, v2, true, ctx);

+    }

+    

     public static void substituteVariables(ILogicalOperator op, LogicalVariable v1, LogicalVariable v2,

             boolean goThroughNts, ITypingContext ctx) throws AlgebricksException {

         ILogicalOperatorVisitor<Void, Pair<LogicalVariable, LogicalVariable>> visitor = new SubstituteVariableVisitor(

                 goThroughNts, ctx);

         op.accept(visitor, new Pair<LogicalVariable, LogicalVariable>(v1, v2));

     }

-

+    

     public static <T> boolean varListEqualUnordered(List<T> var, List<T> varArg) {

         Set<T> varSet = new HashSet<T>();

         Set<T> varArgSet = new HashSet<T>();

diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HashPartitionMergeExchangePOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HashPartitionMergeExchangePOperator.java
index f3c9e5a..61d4880 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HashPartitionMergeExchangePOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HashPartitionMergeExchangePOperator.java
@@ -158,5 +158,13 @@
                 comparatorFactories);
         return new Pair<IConnectorDescriptor, TargetConstraint>(conn, null);
     }
+    
+    public List<LogicalVariable> getPartitionFields() {
+        return partitionFields;
+    }
+    
+    public List<OrderColumn> getOrderColumns() {
+        return orderColumns;
+    }
 
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HybridHashJoinPOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HybridHashJoinPOperator.java
index c737cc4..6da42b4 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HybridHashJoinPOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/HybridHashJoinPOperator.java
@@ -32,14 +32,21 @@
 import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenContext;
 import edu.uci.ics.hyracks.algebricks.core.jobgen.impl.JobGenHelper;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
+import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
 import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
 import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
+import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparatorFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
 import edu.uci.ics.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor;
+import edu.uci.ics.hyracks.dataflow.std.join.OptimizedHybridHashJoinOperatorDescriptor;
 
 public class HybridHashJoinPOperator extends AbstractHashJoinPOperator {
 
@@ -90,6 +97,8 @@
         IVariableTypeEnvironment env = context.getTypeEnvironment(op);
         IBinaryHashFunctionFactory[] hashFunFactories = JobGenHelper.variablesToBinaryHashFunctionFactories(
                 keysLeftBranch, env, context);
+        IBinaryHashFunctionFamily[] hashFunFamilies = JobGenHelper.variablesToBinaryHashFunctionFamilies(
+                keysLeftBranch, env, context);
         IBinaryComparatorFactory[] comparatorFactories = new IBinaryComparatorFactory[keysLeft.length];
         int i = 0;
         IBinaryComparatorFactoryProvider bcfp = context.getBinaryComparatorFactoryProvider();
@@ -97,33 +106,75 @@
             Object t = env.getVarType(v);
             comparatorFactories[i++] = bcfp.getBinaryComparatorFactory(t, true);
         }
-        RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
+        RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op),
+                propagatedSchema, context);
         IOperatorDescriptorRegistry spec = builder.getJobSpec();
         IOperatorDescriptor opDesc = null;
-        try {
-            switch (kind) {
-                case INNER: {
-                    opDesc = new HybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
-                            maxInputBuildSizeInFrames, aveRecordsPerFrame, getFudgeFactor(), keysLeft, keysRight,
-                            hashFunFactories, comparatorFactories, recDescriptor);
-                    break;
-                }
-                case LEFT_OUTER: {
-                    INullWriterFactory[] nullWriterFactories = new INullWriterFactory[inputSchemas[1].getSize()];
-                    for (int j = 0; j < nullWriterFactories.length; j++) {
-                        nullWriterFactories[j] = context.getNullWriterFactory();
-                    }
-                    opDesc = new HybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
-                            maxInputBuildSizeInFrames, aveRecordsPerFrame, getFudgeFactor(), keysLeft, keysRight,
-                            hashFunFactories, comparatorFactories, recDescriptor, true, nullWriterFactories);
-                    break;
-                }
-                default: {
-                    throw new NotImplementedException();
-                }
+
+        boolean optimizedHashJoin = true;
+        for (IBinaryHashFunctionFamily family : hashFunFamilies) {
+            if (family == null) {
+                optimizedHashJoin = false;
+                break;
             }
-        } catch (HyracksDataException e) {
-            throw new AlgebricksException(e);
+        }
+
+        if (!optimizedHashJoin) {
+            try {
+                switch (kind) {
+                    case INNER: {
+                        opDesc = new HybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
+                                maxInputBuildSizeInFrames, aveRecordsPerFrame, getFudgeFactor(), keysLeft, keysRight,
+                                hashFunFactories, comparatorFactories, recDescriptor);
+                        break;
+                    }
+                    case LEFT_OUTER: {
+                        INullWriterFactory[] nullWriterFactories = new INullWriterFactory[inputSchemas[1].getSize()];
+                        for (int j = 0; j < nullWriterFactories.length; j++) {
+                            nullWriterFactories[j] = context.getNullWriterFactory();
+                        }
+                        opDesc = new HybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
+                                maxInputBuildSizeInFrames, aveRecordsPerFrame, getFudgeFactor(), keysLeft, keysRight,
+                                hashFunFactories, comparatorFactories, recDescriptor, true, nullWriterFactories);
+                        break;
+                    }
+                    default: {
+                        throw new NotImplementedException();
+                    }
+                }
+            } catch (HyracksDataException e) {
+                throw new AlgebricksException(e);
+            }
+        } else {
+            try {
+                switch (kind) {
+                    case INNER: {
+                        opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
+                                maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies,
+                                comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories,
+                                        keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories,
+                                        keysRight, keysLeft));
+                        break;
+                    }
+                    case LEFT_OUTER: {
+                        INullWriterFactory[] nullWriterFactories = new INullWriterFactory[inputSchemas[1].getSize()];
+                        for (int j = 0; j < nullWriterFactories.length; j++) {
+                            nullWriterFactories[j] = context.getNullWriterFactory();
+                        }
+                        opDesc = new OptimizedHybridHashJoinOperatorDescriptor(spec, getMemSizeInFrames(),
+                                maxInputBuildSizeInFrames, getFudgeFactor(), keysLeft, keysRight, hashFunFamilies,
+                                comparatorFactories, recDescriptor, new JoinMultiComparatorFactory(comparatorFactories,
+                                        keysLeft, keysRight), new JoinMultiComparatorFactory(comparatorFactories,
+                                        keysRight, keysLeft), true, nullWriterFactories);
+                        break;
+                    }
+                    default: {
+                        throw new NotImplementedException();
+                    }
+                }
+            } catch (HyracksDataException e) {
+                throw new AlgebricksException(e);
+            }
         }
         contributeOpDesc(builder, (AbstractLogicalOperator) op, opDesc);
 
@@ -140,3 +191,72 @@
     }
 
 }
+
+/**
+ * {@ ITuplePairComparatorFactory} implementation for optimized hybrid hash join.
+ */
+class JoinMultiComparatorFactory implements ITuplePairComparatorFactory {
+    private static final long serialVersionUID = 1L;
+
+    private final IBinaryComparatorFactory[] binaryComparatorFactories;
+    private final int[] keysLeft;
+    private final int[] keysRight;
+
+    public JoinMultiComparatorFactory(IBinaryComparatorFactory[] binaryComparatorFactory, int[] keysLeft,
+            int[] keysRight) {
+        this.binaryComparatorFactories = binaryComparatorFactory;
+        this.keysLeft = keysLeft;
+        this.keysRight = keysRight;
+    }
+
+    @Override
+    public ITuplePairComparator createTuplePairComparator(IHyracksTaskContext ctx) {
+        IBinaryComparator[] binaryComparators = new IBinaryComparator[binaryComparatorFactories.length];
+        for (int i = 0; i < binaryComparators.length; i++) {
+            binaryComparators[i] = binaryComparatorFactories[i].createBinaryComparator();
+        }
+        return new JoinMultiComparator(binaryComparators, keysLeft, keysRight);
+    }
+}
+
+/**
+ * {@ ITuplePairComparator} implementation for optimized hybrid hash join.
+ * The comparator applies multiple binary comparators, one for each key pairs
+ */
+class JoinMultiComparator implements ITuplePairComparator {
+    private final IBinaryComparator[] binaryComparators;
+    private final int[] keysLeft;
+    private final int[] keysRight;
+
+    public JoinMultiComparator(IBinaryComparator[] bComparator, int[] keysLeft, int[] keysRight) {
+        this.binaryComparators = bComparator;
+        this.keysLeft = keysLeft;
+        this.keysRight = keysRight;
+    }
+
+    @Override
+    public int compare(IFrameTupleAccessor accessor0, int tIndex0, IFrameTupleAccessor accessor1, int tIndex1) {
+        int tStart0 = accessor0.getTupleStartOffset(tIndex0);
+        int fStartOffset0 = accessor0.getFieldSlotsLength() + tStart0;
+
+        int tStart1 = accessor1.getTupleStartOffset(tIndex1);
+        int fStartOffset1 = accessor1.getFieldSlotsLength() + tStart1;
+
+        for (int i = 0; i < binaryComparators.length; i++) {
+            int fStart0 = accessor0.getFieldStartOffset(tIndex0, keysLeft[i]);
+            int fEnd0 = accessor0.getFieldEndOffset(tIndex0, keysLeft[i]);
+            int fLen0 = fEnd0 - fStart0;
+
+            int fStart1 = accessor1.getFieldStartOffset(tIndex1, keysRight[i]);
+            int fEnd1 = accessor1.getFieldEndOffset(tIndex1, keysRight[i]);
+            int fLen1 = fEnd1 - fStart1;
+
+            int c = binaryComparators[i].compare(accessor0.getBuffer().array(), fStart0 + fStartOffset0, fLen0,
+                    accessor1.getBuffer().array(), fStart1 + fStartOffset1, fLen1);
+            if (c != 0) {
+                return c;
+            }
+        }
+        return 0;
+    }
+}
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
index 8cbd2d8..d153f90 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/NLJoinPOperator.java
@@ -44,6 +44,7 @@
 import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
 import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparatorFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
@@ -56,8 +57,8 @@
 import edu.uci.ics.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor;
 
 /**
- * Left input is broadcast and preserves its local properties.
- * Right input can be partitioned in any way.
+ * Left input is broadcast and preserves its local properties. Right input can
+ * be partitioned in any way.
  */
 public class NLJoinPOperator extends AbstractJoinPOperator {
 
@@ -97,7 +98,7 @@
                 pp = pv1.getPartitioningProperty();
             }
         } else {
-        	pp = IPartitioningProperty.UNPARTITIONED;
+            pp = IPartitioningProperty.UNPARTITIONED;
         }
 
         List<ILocalStructuralProperty> localProps = new LinkedList<ILocalStructuralProperty>();
@@ -122,7 +123,8 @@
             IOperatorSchema propagatedSchema, IOperatorSchema[] inputSchemas, IOperatorSchema outerPlanSchema)
             throws AlgebricksException {
         AbstractBinaryJoinOperator join = (AbstractBinaryJoinOperator) op;
-        RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op), propagatedSchema, context);
+        RecordDescriptor recDescriptor = JobGenHelper.mkRecordDescriptor(context.getTypeEnvironment(op),
+                propagatedSchema, context);
         IOperatorSchema[] conditionInputSchemas = new IOperatorSchema[1];
         conditionInputSchemas[0] = propagatedSchema;
         IExpressionRuntimeProvider expressionRuntimeProvider = context.getExpressionRuntimeProvider();
@@ -135,10 +137,19 @@
 
         switch (kind) {
             case INNER: {
-                opDesc = new NestedLoopJoinOperatorDescriptor(spec, comparatorFactory, recDescriptor, memSize);
+                opDesc = new NestedLoopJoinOperatorDescriptor(spec, comparatorFactory, recDescriptor, memSize, false,
+                        null);
                 break;
             }
-            case LEFT_OUTER:
+            case LEFT_OUTER: {
+                INullWriterFactory[] nullWriterFactories = new INullWriterFactory[inputSchemas[1].getSize()];
+                for (int j = 0; j < nullWriterFactories.length; j++) {
+                    nullWriterFactories[j] = context.getNullWriterFactory();
+                }
+                opDesc = new NestedLoopJoinOperatorDescriptor(spec, comparatorFactory, recDescriptor, memSize, true,
+                        nullWriterFactories);
+                break;
+            }
             default: {
                 throw new NotImplementedException();
             }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/RangePartitionPOperator.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/RangePartitionPOperator.java
index 7e3c935..8875f6c 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/RangePartitionPOperator.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/algebra/operators/physical/RangePartitionPOperator.java
@@ -16,6 +16,7 @@
 
 import java.util.ArrayList;
 import java.util.LinkedList;
+import java.util.List;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
 import edu.uci.ics.hyracks.algebricks.common.exceptions.NotImplementedException;
@@ -69,5 +70,9 @@
             ILogicalOperator op, IOperatorSchema opSchema, JobGenContext context) throws AlgebricksException {
         throw new NotImplementedException();
     }
+    
+    public List<OrderColumn> getPartitioningFields() {
+        return partitioningFields;
+    }
 
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenContext.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenContext.java
index 22a1a81..365d1a5 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenContext.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenContext.java
@@ -34,6 +34,7 @@
 import edu.uci.ics.hyracks.algebricks.data.IBinaryBooleanInspectorFactory;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFamilyProvider;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryIntegerInspectorFactory;
 import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IPrinterFactoryProvider;
@@ -42,151 +43,167 @@
 import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
 
 public class JobGenContext {
-    private final IOperatorSchema outerFlowSchema;
-    private final Map<ILogicalOperator, IOperatorSchema> schemaMap = new HashMap<ILogicalOperator, IOperatorSchema>();
-    private final ISerializerDeserializerProvider serializerDeserializerProvider;
-    private final IBinaryHashFunctionFactoryProvider hashFunctionFactoryProvider;
-    private final IBinaryComparatorFactoryProvider comparatorFactoryProvider;
-    private final IPrinterFactoryProvider printerFactoryProvider;
-    private final ITypeTraitProvider typeTraitProvider;
-    private final IMetadataProvider<?, ?> metadataProvider;
-    private final INullWriterFactory nullWriterFactory;
-    private final INormalizedKeyComputerFactoryProvider normalizedKeyComputerFactoryProvider;
-    private final Object appContext;
-    private final IBinaryBooleanInspectorFactory booleanInspectorFactory;
-    private final IBinaryIntegerInspectorFactory integerInspectorFactory;
-    private final IExpressionRuntimeProvider expressionRuntimeProvider;
-    private final IExpressionTypeComputer expressionTypeComputer;
-    private final IExpressionEvalSizeComputer expressionEvalSizeComputer;
-    private final IPartialAggregationTypeComputer partialAggregationTypeComputer;
-    private final int frameSize;
-    private AlgebricksPartitionConstraint clusterLocations;
-    private int varCounter;
-    private final ITypingContext typingContext;
+	private final IOperatorSchema outerFlowSchema;
+	private final Map<ILogicalOperator, IOperatorSchema> schemaMap = new HashMap<ILogicalOperator, IOperatorSchema>();
+	private final ISerializerDeserializerProvider serializerDeserializerProvider;
+	private final IBinaryHashFunctionFactoryProvider hashFunctionFactoryProvider;
+	private final IBinaryHashFunctionFamilyProvider hashFunctionFamilyProvider;
+	private final IBinaryComparatorFactoryProvider comparatorFactoryProvider;
+	private final IPrinterFactoryProvider printerFactoryProvider;
+	private final ITypeTraitProvider typeTraitProvider;
+	private final IMetadataProvider<?, ?> metadataProvider;
+	private final INullWriterFactory nullWriterFactory;
+	private final INormalizedKeyComputerFactoryProvider normalizedKeyComputerFactoryProvider;
+	private final Object appContext;
+	private final IBinaryBooleanInspectorFactory booleanInspectorFactory;
+	private final IBinaryIntegerInspectorFactory integerInspectorFactory;
+	private final IExpressionRuntimeProvider expressionRuntimeProvider;
+	private final IExpressionTypeComputer expressionTypeComputer;
+	private final IExpressionEvalSizeComputer expressionEvalSizeComputer;
+	private final IPartialAggregationTypeComputer partialAggregationTypeComputer;
+	private final int frameSize;
+	private AlgebricksPartitionConstraint clusterLocations;
+	private int varCounter;
+	private final ITypingContext typingContext;
 
-    public JobGenContext(IOperatorSchema outerFlowSchema, IMetadataProvider<?, ?> metadataProvider, Object appContext,
-            ISerializerDeserializerProvider serializerDeserializerProvider,
-            IBinaryHashFunctionFactoryProvider hashFunctionFactoryProvider,
-            IBinaryComparatorFactoryProvider comparatorFactoryProvider, ITypeTraitProvider typeTraitProvider,
-            IBinaryBooleanInspectorFactory booleanInspectorFactory,
-            IBinaryIntegerInspectorFactory integerInspectorFactory, IPrinterFactoryProvider printerFactoryProvider,
-            INullWriterFactory nullWriterFactory,
-            INormalizedKeyComputerFactoryProvider normalizedKeyComputerFactoryProvider,
-            IExpressionRuntimeProvider expressionRuntimeProvider, IExpressionTypeComputer expressionTypeComputer,
-            INullableTypeComputer nullableTypeComputer, ITypingContext typingContext,
-            IExpressionEvalSizeComputer expressionEvalSizeComputer,
-            IPartialAggregationTypeComputer partialAggregationTypeComputer, int frameSize,
-            AlgebricksPartitionConstraint clusterLocations) {
-        this.outerFlowSchema = outerFlowSchema;
-        this.metadataProvider = metadataProvider;
-        this.appContext = appContext;
-        this.serializerDeserializerProvider = serializerDeserializerProvider;
-        this.hashFunctionFactoryProvider = hashFunctionFactoryProvider;
-        this.comparatorFactoryProvider = comparatorFactoryProvider;
-        this.typeTraitProvider = typeTraitProvider;
-        this.booleanInspectorFactory = booleanInspectorFactory;
-        this.integerInspectorFactory = integerInspectorFactory;
-        this.printerFactoryProvider = printerFactoryProvider;
-        this.clusterLocations = clusterLocations;
-        this.normalizedKeyComputerFactoryProvider = normalizedKeyComputerFactoryProvider;
-        this.nullWriterFactory = nullWriterFactory;
-        this.expressionRuntimeProvider = expressionRuntimeProvider;
-        this.expressionTypeComputer = expressionTypeComputer;
-        this.typingContext = typingContext;
-        this.expressionEvalSizeComputer = expressionEvalSizeComputer;
-        this.partialAggregationTypeComputer = partialAggregationTypeComputer;
-        this.frameSize = frameSize;
-        this.varCounter = 0;
-    }
+	public JobGenContext(
+			IOperatorSchema outerFlowSchema,
+			IMetadataProvider<?, ?> metadataProvider,
+			Object appContext,
+			ISerializerDeserializerProvider serializerDeserializerProvider,
+			IBinaryHashFunctionFactoryProvider hashFunctionFactoryProvider,
+			IBinaryHashFunctionFamilyProvider hashFunctionFamilyProvider,
+			IBinaryComparatorFactoryProvider comparatorFactoryProvider,
+			ITypeTraitProvider typeTraitProvider,
+			IBinaryBooleanInspectorFactory booleanInspectorFactory,
+			IBinaryIntegerInspectorFactory integerInspectorFactory,
+			IPrinterFactoryProvider printerFactoryProvider,
+			INullWriterFactory nullWriterFactory,
+			INormalizedKeyComputerFactoryProvider normalizedKeyComputerFactoryProvider,
+			IExpressionRuntimeProvider expressionRuntimeProvider,
+			IExpressionTypeComputer expressionTypeComputer,
+			INullableTypeComputer nullableTypeComputer,
+			ITypingContext typingContext,
+			IExpressionEvalSizeComputer expressionEvalSizeComputer,
+			IPartialAggregationTypeComputer partialAggregationTypeComputer,
+			int frameSize, AlgebricksPartitionConstraint clusterLocations) {
+		this.outerFlowSchema = outerFlowSchema;
+		this.metadataProvider = metadataProvider;
+		this.appContext = appContext;
+		this.serializerDeserializerProvider = serializerDeserializerProvider;
+		this.hashFunctionFactoryProvider = hashFunctionFactoryProvider;
+		this.hashFunctionFamilyProvider = hashFunctionFamilyProvider;
+		this.comparatorFactoryProvider = comparatorFactoryProvider;
+		this.typeTraitProvider = typeTraitProvider;
+		this.booleanInspectorFactory = booleanInspectorFactory;
+		this.integerInspectorFactory = integerInspectorFactory;
+		this.printerFactoryProvider = printerFactoryProvider;
+		this.clusterLocations = clusterLocations;
+		this.normalizedKeyComputerFactoryProvider = normalizedKeyComputerFactoryProvider;
+		this.nullWriterFactory = nullWriterFactory;
+		this.expressionRuntimeProvider = expressionRuntimeProvider;
+		this.expressionTypeComputer = expressionTypeComputer;
+		this.typingContext = typingContext;
+		this.expressionEvalSizeComputer = expressionEvalSizeComputer;
+		this.partialAggregationTypeComputer = partialAggregationTypeComputer;
+		this.frameSize = frameSize;
+		this.varCounter = 0;
+	}
 
-    public IOperatorSchema getOuterFlowSchema() {
-        return outerFlowSchema;
-    }
+	public IOperatorSchema getOuterFlowSchema() {
+		return outerFlowSchema;
+	}
 
-    public AlgebricksPartitionConstraint getClusterLocations() {
-        return clusterLocations;
-    }
+	public AlgebricksPartitionConstraint getClusterLocations() {
+		return clusterLocations;
+	}
 
-    public IMetadataProvider<?, ?> getMetadataProvider() {
-        return metadataProvider;
-    }
+	public IMetadataProvider<?, ?> getMetadataProvider() {
+		return metadataProvider;
+	}
 
-    public Object getAppContext() {
-        return appContext;
-    }
+	public Object getAppContext() {
+		return appContext;
+	}
 
-    public ISerializerDeserializerProvider getSerializerDeserializerProvider() {
-        return serializerDeserializerProvider;
-    }
+	public ISerializerDeserializerProvider getSerializerDeserializerProvider() {
+		return serializerDeserializerProvider;
+	}
 
-    public IBinaryHashFunctionFactoryProvider getBinaryHashFunctionFactoryProvider() {
-        return hashFunctionFactoryProvider;
-    }
+	public IBinaryHashFunctionFactoryProvider getBinaryHashFunctionFactoryProvider() {
+		return hashFunctionFactoryProvider;
+	}
 
-    public IBinaryComparatorFactoryProvider getBinaryComparatorFactoryProvider() {
-        return comparatorFactoryProvider;
-    }
+	public IBinaryHashFunctionFamilyProvider getBinaryHashFunctionFamilyProvider() {
+		return hashFunctionFamilyProvider;
+	}
 
-    public ITypeTraitProvider getTypeTraitProvider() {
-        return typeTraitProvider;
-    }
+	public IBinaryComparatorFactoryProvider getBinaryComparatorFactoryProvider() {
+		return comparatorFactoryProvider;
+	}
 
-    public IBinaryBooleanInspectorFactory getBinaryBooleanInspectorFactory() {
-        return booleanInspectorFactory;
-    }
+	public ITypeTraitProvider getTypeTraitProvider() {
+		return typeTraitProvider;
+	}
 
-    public IBinaryIntegerInspectorFactory getBinaryIntegerInspectorFactory() {
-        return integerInspectorFactory;
-    }
+	public IBinaryBooleanInspectorFactory getBinaryBooleanInspectorFactory() {
+		return booleanInspectorFactory;
+	}
 
-    public IPrinterFactoryProvider getPrinterFactoryProvider() {
-        return printerFactoryProvider;
-    }
+	public IBinaryIntegerInspectorFactory getBinaryIntegerInspectorFactory() {
+		return integerInspectorFactory;
+	}
 
-    public IExpressionRuntimeProvider getExpressionRuntimeProvider() {
-        return expressionRuntimeProvider;
-    }
+	public IPrinterFactoryProvider getPrinterFactoryProvider() {
+		return printerFactoryProvider;
+	}
 
-    public IOperatorSchema getSchema(ILogicalOperator op) {
-        return schemaMap.get(op);
-    }
+	public IExpressionRuntimeProvider getExpressionRuntimeProvider() {
+		return expressionRuntimeProvider;
+	}
 
-    public void putSchema(ILogicalOperator op, IOperatorSchema schema) {
-        schemaMap.put(op, schema);
-    }
+	public IOperatorSchema getSchema(ILogicalOperator op) {
+		return schemaMap.get(op);
+	}
 
-    public LogicalVariable createNewVar() {
-        varCounter++;
-        LogicalVariable var = new LogicalVariable(-varCounter);
-        return var;
-    }
+	public void putSchema(ILogicalOperator op, IOperatorSchema schema) {
+		schemaMap.put(op, schema);
+	}
 
-    public Object getType(ILogicalExpression expr, IVariableTypeEnvironment env) throws AlgebricksException {
-        return expressionTypeComputer.getType(expr, typingContext.getMetadataProvider(), env);
-    }
+	public LogicalVariable createNewVar() {
+		varCounter++;
+		LogicalVariable var = new LogicalVariable(-varCounter);
+		return var;
+	}
 
-    public INullWriterFactory getNullWriterFactory() {
-        return nullWriterFactory;
-    }
+	public Object getType(ILogicalExpression expr, IVariableTypeEnvironment env)
+			throws AlgebricksException {
+		return expressionTypeComputer.getType(expr,
+				typingContext.getMetadataProvider(), env);
+	}
 
-    public INormalizedKeyComputerFactoryProvider getNormalizedKeyComputerFactoryProvider() {
-        return normalizedKeyComputerFactoryProvider;
-    }
+	public INullWriterFactory getNullWriterFactory() {
+		return nullWriterFactory;
+	}
 
-    public IExpressionEvalSizeComputer getExpressionEvalSizeComputer() {
-        return expressionEvalSizeComputer;
-    }
+	public INormalizedKeyComputerFactoryProvider getNormalizedKeyComputerFactoryProvider() {
+		return normalizedKeyComputerFactoryProvider;
+	}
 
-    public int getFrameSize() {
-        return frameSize;
-    }
+	public IExpressionEvalSizeComputer getExpressionEvalSizeComputer() {
+		return expressionEvalSizeComputer;
+	}
 
-    public IPartialAggregationTypeComputer getPartialAggregationTypeComputer() {
-        return partialAggregationTypeComputer;
-    }
+	public int getFrameSize() {
+		return frameSize;
+	}
 
-    public IVariableTypeEnvironment getTypeEnvironment(ILogicalOperator op) {
-        return typingContext.getOutputTypeEnvironment(op);
-    }
+	public IPartialAggregationTypeComputer getPartialAggregationTypeComputer() {
+		return partialAggregationTypeComputer;
+	}
+
+	public IVariableTypeEnvironment getTypeEnvironment(ILogicalOperator op) {
+		return typingContext.getOutputTypeEnvironment(op);
+	}
 
 }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenHelper.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenHelper.java
index 790fb93..530d19c 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenHelper.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/jobgen/impl/JobGenHelper.java
@@ -24,6 +24,7 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.IOperatorSchema;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFamilyProvider;
 import edu.uci.ics.hyracks.algebricks.data.INormalizedKeyComputerFactoryProvider;
 import edu.uci.ics.hyracks.algebricks.data.IPrinterFactory;
 import edu.uci.ics.hyracks.algebricks.data.IPrinterFactoryProvider;
@@ -31,6 +32,7 @@
 import edu.uci.ics.hyracks.algebricks.data.ITypeTraitProvider;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
 import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
@@ -42,8 +44,8 @@
 
     @SuppressWarnings("rawtypes")
     public static RecordDescriptor mkRecordDescriptor(IVariableTypeEnvironment env, IOperatorSchema opSchema,
-            JobGenContext context) throws AlgebricksException {        
-		ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
+            JobGenContext context) throws AlgebricksException {
+        ISerializerDeserializer[] fields = new ISerializerDeserializer[opSchema.getSize()];
         ITypeTraits[] typeTraits = new ITypeTraits[opSchema.getSize()];
         ISerializerDeserializerProvider sdp = context.getSerializerDeserializerProvider();
         ITypeTraitProvider ttp = context.getTypeTraitProvider();
@@ -59,7 +61,7 @@
         }
         return new RecordDescriptor(fields, typeTraits);
     }
-    
+
     public static IPrinterFactory[] mkPrinterFactories(IOperatorSchema opSchema, IVariableTypeEnvironment env,
             JobGenContext context, int[] printColumns) throws AlgebricksException {
         IPrinterFactory[] pf = new IPrinterFactory[printColumns.length];
@@ -94,7 +96,20 @@
         }
         return funFactories;
     }
-    
+
+    public static IBinaryHashFunctionFamily[] variablesToBinaryHashFunctionFamilies(
+            Collection<LogicalVariable> varLogical, IVariableTypeEnvironment env, JobGenContext context)
+            throws AlgebricksException {
+        IBinaryHashFunctionFamily[] funFamilies = new IBinaryHashFunctionFamily[varLogical.size()];
+        int i = 0;
+        IBinaryHashFunctionFamilyProvider bhffProvider = context.getBinaryHashFunctionFamilyProvider();
+        for (LogicalVariable var : varLogical) {
+            Object type = env.getVarType(var);
+            funFamilies[i++] = bhffProvider.getBinaryHashFunctionFamily(type);
+        }
+        return funFamilies;
+    }
+
     public static IBinaryComparatorFactory[] variablesToAscBinaryComparatorFactories(
             Collection<LogicalVariable> varLogical, IVariableTypeEnvironment env, JobGenContext context)
             throws AlgebricksException {
@@ -107,15 +122,14 @@
         }
         return compFactories;
     }
-    
-    public static IBinaryComparatorFactory[] variablesToAscBinaryComparatorFactories(
-            List<LogicalVariable> varLogical, int start, int size, IVariableTypeEnvironment env, JobGenContext context)
-            throws AlgebricksException {
+
+    public static IBinaryComparatorFactory[] variablesToAscBinaryComparatorFactories(List<LogicalVariable> varLogical,
+            int start, int size, IVariableTypeEnvironment env, JobGenContext context) throws AlgebricksException {
         IBinaryComparatorFactory[] compFactories = new IBinaryComparatorFactory[size];
         IBinaryComparatorFactoryProvider bcfProvider = context.getBinaryComparatorFactoryProvider();
         for (int i = 0; i < size; i++) {
-                Object type = env.getVarType(varLogical.get(start + i));
-                compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
+            Object type = env.getVarType(varLogical.get(start + i));
+            compFactories[i] = bcfProvider.getBinaryComparatorFactory(type, true);
         }
         return compFactories;
     }
@@ -144,15 +158,14 @@
         }
         return typeTraits;
     }
-    
-    public static ITypeTraits[] variablesToTypeTraits(
-            List<LogicalVariable> varLogical, int start, int size, IVariableTypeEnvironment env, JobGenContext context)
-            throws AlgebricksException {
+
+    public static ITypeTraits[] variablesToTypeTraits(List<LogicalVariable> varLogical, int start, int size,
+            IVariableTypeEnvironment env, JobGenContext context) throws AlgebricksException {
         ITypeTraits[] typeTraits = new ITypeTraits[size];
         ITypeTraitProvider typeTraitProvider = context.getTypeTraitProvider();
         for (int i = 0; i < size; i++) {
-                Object type = env.getVarType(varLogical.get(start + i));
-                typeTraits[i] = typeTraitProvider.getTypeTrait(type);
+            Object type = env.getVarType(varLogical.get(start + i));
+            typeTraits[i] = typeTraitProvider.getTypeTrait(type);
         }
         return typeTraits;
     }
diff --git a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/AlgebricksOptimizationContext.java b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/AlgebricksOptimizationContext.java
index 7c63e01..738fc7f 100644
--- a/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/AlgebricksOptimizationContext.java
+++ b/algebricks/algebricks-core/src/main/java/edu/uci/ics/hyracks/algebricks/core/rewriter/base/AlgebricksOptimizationContext.java
@@ -135,6 +135,7 @@
     /*
      * returns true if op1 and op2 have already been compared
      */
+    @Override
     public boolean checkAndAddToAlreadyCompared(ILogicalOperator op1, ILogicalOperator op2) {
         HashSet<ILogicalOperator> ops = alreadyCompared.get(op1);
         if (ops == null) {
@@ -151,6 +152,11 @@
             }
         }
     }
+    
+    @Override
+    public void removeFromAlreadyCompared(ILogicalOperator op1) {
+        alreadyCompared.remove(op1);
+    }
 
     public void addNotToBeInlinedVar(LogicalVariable var) {
         notToBeInlinedVars.add(var);
diff --git a/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IBinaryHashFunctionFamilyProvider.java b/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IBinaryHashFunctionFamilyProvider.java
new file mode 100644
index 0000000..8a992b3
--- /dev/null
+++ b/algebricks/algebricks-data/src/main/java/edu/uci/ics/hyracks/algebricks/data/IBinaryHashFunctionFamilyProvider.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.algebricks.data;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public interface IBinaryHashFunctionFamilyProvider {
+
+	public IBinaryHashFunctionFamily getBinaryHashFunctionFamily(Object type)
+			throws AlgebricksException;
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexJoinInferenceRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexJoinInferenceRule.java
index 4f6699a..3daf85d 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexJoinInferenceRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexJoinInferenceRule.java
@@ -69,7 +69,7 @@
         VariableUtilities.getUsedVariables(op, varsUsedInUnnest);
 
         HashSet<LogicalVariable> producedInSubplan = new HashSet<LogicalVariable>();
-        VariableUtilities.getLiveVariables(subplan, producedInSubplan);
+        VariableUtilities.getProducedVariables(subplan, producedInSubplan);
 
         if (!producedInSubplan.containsAll(varsUsedInUnnest)) {
             return false;
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexUnnestToProductRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexUnnestToProductRule.java
index 4521d1a..fa5000e 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexUnnestToProductRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ComplexUnnestToProductRule.java
@@ -149,9 +149,15 @@
             // Trivially joinable.
             return true;
         }
-        if (!belowSecondUnnest && op.getOperatorTag() == LogicalOperatorTag.SUBPLAN) {
-            // Bail on subplan.
-            return false;
+        if (!belowSecondUnnest) {
+            // Bail on the following operators.
+            switch (op.getOperatorTag()) {
+                case AGGREGATE:
+                case SUBPLAN:
+                case GROUP:
+                case UNNEST_MAP:
+                    return false;
+            }
         }
         switch (op.getOperatorTag()) {
             case UNNEST:
@@ -211,7 +217,8 @@
                 for (LogicalVariable producedVar : producedVars) {
                     if (outerUsedVars.contains(producedVar)) {
                         outerMatches++;
-                    } else if (innerUsedVars.contains(producedVar)) {
+                    }
+                    if (innerUsedVars.contains(producedVar)) {
                         innerMatches++;
                     }
                 }
@@ -221,24 +228,30 @@
                     // All produced vars used by outer partition.
                     outerOps.add(op);
                     targetUsedVars = outerUsedVars;
-                } else if (innerMatches == producedVars.size() && !producedVars.isEmpty()) {
+                }
+                if (innerMatches == producedVars.size() && !producedVars.isEmpty()) {
                     // All produced vars used by inner partition.
                     innerOps.add(op);
                     targetUsedVars = innerUsedVars;
-                } else if (innerMatches == 0 && outerMatches == 0) {
+                }
+                if (innerMatches == 0 && outerMatches == 0) {
                     // Op produces variables that are not used in the part of the plan we've seen (or it doesn't produce any vars).
                     // Try to figure out where it belongs by analyzing the used variables.
                     List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
                     VariableUtilities.getUsedVariables(op, usedVars);
                     for (LogicalVariable usedVar : usedVars) {
+                        boolean canBreak = false;
                         if (outerUsedVars.contains(usedVar)) {
                             outerOps.add(op);
                             targetUsedVars = outerUsedVars;
-                            break;
+                            canBreak = true;
                         }
                         if (innerUsedVars.contains(usedVar)) {
                             innerOps.add(op);
                             targetUsedVars = innerUsedVars;
+                            canBreak = true;
+                        }
+                        if (canBreak) {
                             break;
                         }
                     }
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
new file mode 100644
index 0000000..cbe2b4a
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/ExtractCommonExpressionsRule.java
@@ -0,0 +1,421 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Factors out common sub-expressions by assigning them to a variables, and replacing the common sub-expressions with references to those variables.
+ *
+ * Preconditions/Assumptions:
+ * Assumes no projects are in the plan. This rule ignores variable reference expressions and constants (other rules deal with those separately).
+ * 
+ * Postconditions/Examples:
+ * Plan with extracted sub-expressions. Generates one assign operator per extracted expression.
+ * 
+ * Example 1 - Simple Arithmetic Example (simplified)
+ * 
+ * Before plan:
+ * assign [$$1] <- [5 + 6 - 10]
+ *   assign [$$0] <- [5 + 6 + 30]
+ * 
+ * After plan:
+ * assign [$$1] <- [$$5 - 10]
+ *   assign [$$0] <- [$$5 + 30]
+ *     assign [$$5] <- [5 + 6]
+ * 
+ * Example 2 - Cleaning up 'Distinct By' (simplified)
+ * 
+ * Before plan: (notice how $$0 is not live after the distinct)
+ * assign [$$3] <- [field-access($$0, 1)]
+ *   distinct ([%0->$$5])
+ *     assign [$$5] <- [field-access($$0, 1)]
+ *       unnest $$0 <- [scan-dataset]
+ * 
+ * After plan: (notice how the issue of $$0 is fixed)
+ * assign [$$3] <- [$$5]
+ *   distinct ([$$5])
+ *     assign [$$5] <- [field-access($$0, 1)]
+ *       unnest $$0 <- [scan-dataset]
+ * 
+ * Example 3 - Pulling Common Expressions Above Joins (simplified)
+ * 
+ * Before plan:
+ * assign [$$9] <- funcZ(funcY($$8))
+ *   join (funcX(funcY($$8)))
+ * 
+ * After plan:
+ * assign [$$9] <- funcZ($$10))
+ *   select (funcX($$10))
+ *     assign [$$10] <- [funcY($$8)]
+ *       join (TRUE)
+ */
+public class ExtractCommonExpressionsRule implements IAlgebraicRewriteRule {
+
+    private final CommonExpressionSubstitutionVisitor substVisitor = new CommonExpressionSubstitutionVisitor();
+    private final Map<ILogicalExpression, ExprEquivalenceClass> exprEqClassMap = new HashMap<ILogicalExpression, ExprEquivalenceClass>();
+    
+    // Set of operators for which common subexpression elimination should not be performed.
+    private static final Set<LogicalOperatorTag> ignoreOps = new HashSet<LogicalOperatorTag>();
+    static {
+        ignoreOps.add(LogicalOperatorTag.UNNEST);
+        ignoreOps.add(LogicalOperatorTag.UNNEST_MAP);
+        ignoreOps.add(LogicalOperatorTag.ORDER);
+        ignoreOps.add(LogicalOperatorTag.PROJECT);
+        ignoreOps.add(LogicalOperatorTag.AGGREGATE);
+        ignoreOps.add(LogicalOperatorTag.RUNNINGAGGREGATE);
+    }
+    
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        exprEqClassMap.clear();
+        substVisitor.setContext(context);
+        boolean modified = removeCommonExpressions(opRef, context);
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(opRef.getValue());
+        }
+        return modified;
+    }
+
+    private void updateEquivalenceClassMap(LogicalVariable lhs, Mutable<ILogicalExpression> rhsExprRef, ILogicalOperator op) {
+        ExprEquivalenceClass exprEqClass = exprEqClassMap.get(rhsExprRef.getValue());
+        if (exprEqClass == null) {
+            exprEqClass = new ExprEquivalenceClass(op, rhsExprRef);
+            exprEqClassMap.put(rhsExprRef.getValue(), exprEqClass);
+        }
+        exprEqClass.setVariable(lhs);
+    }
+
+    private boolean removeCommonExpressions(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        if (context.checkIfInDontApplySet(this, opRef.getValue())) {
+            return false;
+        }
+        
+        boolean modified = false;
+        // Recurse into children.
+        for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
+            if (removeCommonExpressions(inputOpRef, context)) {
+                modified = true;
+            }
+        }
+        
+        // TODO: Deal with replicate properly. Currently, we just clear the expr equivalence map, since we want to avoid incorrect expression replacement
+        // (the resulting new variables should be assigned live below a replicate).
+        if (op.getOperatorTag() == LogicalOperatorTag.REPLICATE) {
+            exprEqClassMap.clear();
+            return modified;
+        }
+        // Exclude these operators.
+        if (ignoreOps.contains(op.getOperatorTag())) {
+            return modified;
+        }
+        
+        // Perform common subexpression elimination.
+        substVisitor.setOperator(op);
+        if (op.acceptExpressionTransform(substVisitor)) {
+            modified = true;
+        }
+        
+        // Update equivalence class map.
+        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+            AssignOperator assignOp = (AssignOperator) op;
+            int numVars = assignOp.getVariables().size();
+            for (int i = 0; i < numVars; i++) {
+                Mutable<ILogicalExpression> exprRef = assignOp.getExpressions().get(i);
+                ILogicalExpression expr = exprRef.getValue();
+                if (expr.getExpressionTag() == LogicalExpressionTag.VARIABLE
+                        || expr.getExpressionTag() == LogicalExpressionTag.CONSTANT) {
+                    continue;
+                }
+                // Update equivalence class map.
+                LogicalVariable lhs = assignOp.getVariables().get(i);
+                updateEquivalenceClassMap(lhs, exprRef, op);
+            }
+        }
+
+        // TODO: For now do not perform replacement in nested plans
+        // due to the complication of figuring out whether the firstOp in an equivalence class is within a subplan, 
+        // and the resulting variable will not be visible to the outside.
+        // Since subplans should be eliminated in most cases, this behavior is acceptable for now.
+        /*
+        if (op.hasNestedPlans()) {
+            AbstractOperatorWithNestedPlans opWithNestedPlan = (AbstractOperatorWithNestedPlans) op;
+            for (ILogicalPlan nestedPlan : opWithNestedPlan.getNestedPlans()) {
+                for (Mutable<ILogicalOperator> rootRef : nestedPlan.getRoots()) {
+                    if (removeCommonExpressions(rootRef, context)) {
+                        modified = true;
+                    }
+                }
+            }
+        }
+        */
+
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+            context.addToDontApplySet(this, op);
+        }
+        return modified;
+    }
+
+    private class CommonExpressionSubstitutionVisitor implements ILogicalExpressionReferenceTransform {
+                
+        private final Set<LogicalVariable> liveVars = new HashSet<LogicalVariable>();
+        private final List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
+        private IOptimizationContext context;
+        private ILogicalOperator op;        
+        
+        public void setContext(IOptimizationContext context) {
+            this.context = context;
+        }
+        
+        public void setOperator(ILogicalOperator op) throws AlgebricksException {
+            this.op = op;
+            liveVars.clear();
+            usedVars.clear();
+        }
+        
+        @Override
+        public boolean transform(Mutable<ILogicalExpression> exprRef) throws AlgebricksException {
+            if (liveVars.isEmpty() && usedVars.isEmpty()) {
+                VariableUtilities.getLiveVariables(op, liveVars);
+                VariableUtilities.getUsedVariables(op, usedVars);
+            }
+            
+            AbstractLogicalExpression expr = (AbstractLogicalExpression) exprRef.getValue();
+            boolean modified = false;
+            ExprEquivalenceClass exprEqClass = exprEqClassMap.get(expr);
+            if (exprEqClass != null) {
+                // Replace common subexpression with existing variable. 
+                if (exprEqClass.variableIsSet()) {
+                    // Check if the replacing variable is live at this op.
+                    // However, if the op is already using variables that are not live, then a replacement may enable fixing the plan.
+                    // This behavior is necessary to, e.g., properly deal with distinct by.
+                    // Also just replace the expr if we are replacing common exprs from within the same operator.
+                    if (liveVars.contains(exprEqClass.getVariable()) || !liveVars.containsAll(usedVars)
+                            || op == exprEqClass.getFirstOperator()) {
+                        exprRef.setValue(new VariableReferenceExpression(exprEqClass.getVariable()));
+                        // Do not descend into children since this expr has been completely replaced.
+                        return true;
+                    }
+                } else {
+                    if (assignCommonExpression(exprEqClass, expr)) {
+                        exprRef.setValue(new VariableReferenceExpression(exprEqClass.getVariable()));
+                        // Do not descend into children since this expr has been completely replaced.
+                        return true;
+                    }
+                }
+            } else {
+                if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE
+                        && expr.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
+                    exprEqClass = new ExprEquivalenceClass(op, exprRef);
+                    exprEqClassMap.put(expr, exprEqClass);
+                }
+            }
+            
+            // Descend into function arguments.
+            if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+                AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+                for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
+                    if (transform(arg)) {
+                        modified = true;
+                    }
+                }
+            }
+            return modified;
+        }
+        
+        private boolean assignCommonExpression(ExprEquivalenceClass exprEqClass, ILogicalExpression expr) throws AlgebricksException {
+            AbstractLogicalOperator firstOp = (AbstractLogicalOperator) exprEqClass.getFirstOperator();
+            Mutable<ILogicalExpression> firstExprRef = exprEqClass.getFirstExpression();
+            if (firstOp.getOperatorTag() == LogicalOperatorTag.INNERJOIN || firstOp.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN) {
+                // Do not extract common expressions from within the same join operator.
+                if (firstOp == op) {
+                    return false;
+                }
+                AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) firstOp;
+                Mutable<ILogicalExpression> joinCond = joinOp.getCondition();                
+                ILogicalExpression enclosingExpr = getEnclosingExpression(joinCond, firstExprRef.getValue());
+                if (enclosingExpr == null) {
+                    // No viable enclosing expression that we can pull out from the join.
+                    return false;
+                }
+                // Place a Select operator beneath op that contains the enclosing expression.
+                SelectOperator selectOp = new SelectOperator(new MutableObject<ILogicalExpression>(enclosingExpr));
+                selectOp.getInputs().add(new MutableObject<ILogicalOperator>(op.getInputs().get(0).getValue()));
+                op.getInputs().get(0).setValue(selectOp);
+                // Set firstOp to be the select below op, since we want to assign the common subexpr there.
+                firstOp = (AbstractLogicalOperator) selectOp;
+            } else if (firstOp.getInputs().size() > 1) { 
+                // Bail for any non-join operator with multiple inputs.
+                return false;
+            }                        
+            LogicalVariable newVar = context.newVar();
+            AssignOperator newAssign = new AssignOperator(newVar, new MutableObject<ILogicalExpression>(firstExprRef.getValue().cloneExpression()));            
+            // Place assign below firstOp.
+            newAssign.getInputs().add(new MutableObject<ILogicalOperator>(firstOp.getInputs().get(0).getValue()));
+            newAssign.setExecutionMode(firstOp.getExecutionMode());
+            firstOp.getInputs().get(0).setValue(newAssign);
+            // Replace original expr with variable reference, and set var in expression equivalence class.
+            firstExprRef.setValue(new VariableReferenceExpression(newVar));
+            exprEqClass.setVariable(newVar);
+            context.computeAndSetTypeEnvironmentForOperator(newAssign);
+            context.computeAndSetTypeEnvironmentForOperator(firstOp);
+            return true;
+        }
+
+        private ILogicalExpression getEnclosingExpression(Mutable<ILogicalExpression> conditionExprRef, ILogicalExpression commonSubExpr) {
+            ILogicalExpression conditionExpr = conditionExprRef.getValue();
+            if (conditionExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                return null;
+            }
+            if (isEqJoinCondition(commonSubExpr)) {
+                // Do not eliminate the common expression if we could use it for an equi-join.
+                return null;
+            }
+            AbstractFunctionCallExpression conditionFuncExpr = (AbstractFunctionCallExpression) conditionExpr;
+            // Boolean expression that encloses the common subexpression.
+            ILogicalExpression enclosingBoolExpr = null;
+            // We are not dealing with arbitrarily nested and/or expressions here.
+            FunctionIdentifier funcIdent = conditionFuncExpr.getFunctionIdentifier();
+            if (funcIdent.equals(AlgebricksBuiltinFunctions.AND) || funcIdent.equals(AlgebricksBuiltinFunctions.OR)) {
+                Iterator<Mutable<ILogicalExpression>> argIter = conditionFuncExpr.getArguments().iterator();
+                while (argIter.hasNext()) {
+                    Mutable<ILogicalExpression> argRef = argIter.next();
+                    if (containsExpr(argRef.getValue(), commonSubExpr)) {
+                        enclosingBoolExpr = argRef.getValue();
+                        // Remove the enclosing expression from the argument list.
+                        // We are going to pull it out into a new select operator.
+                        argIter.remove();
+                        break;
+                    }
+                }
+                // If and/or only has a single argument left, pull it out and remove the and/or function.
+                if (conditionFuncExpr.getArguments().size() == 1) {
+                    conditionExprRef.setValue(conditionFuncExpr.getArguments().get(0).getValue());
+                }
+            } else {
+                if (!containsExpr(conditionExprRef.getValue(), commonSubExpr)) {
+                    return null;
+                }
+                enclosingBoolExpr = conditionFuncExpr;
+                // Replace the enclosing expression with TRUE.
+                conditionExprRef.setValue(ConstantExpression.TRUE);
+            }
+            return enclosingBoolExpr;
+        }
+    }
+    
+    private boolean containsExpr(ILogicalExpression expr, ILogicalExpression searchExpr) {
+        if (expr == searchExpr) {
+            return true;
+        }
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+        for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+            if (containsExpr(argRef.getValue(), searchExpr)) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    private boolean isEqJoinCondition(ILogicalExpression expr) {
+        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+        if (funcExpr.getFunctionIdentifier().equals(AlgebricksBuiltinFunctions.EQ)) {
+            ILogicalExpression arg1 = funcExpr.getArguments().get(0).getValue();
+            ILogicalExpression arg2 = funcExpr.getArguments().get(1).getValue();
+            if (arg1.getExpressionTag() == LogicalExpressionTag.VARIABLE
+                    && arg2.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    private final class ExprEquivalenceClass {
+        // First operator in which expression is used.
+        private final ILogicalOperator firstOp;
+        
+        // Reference to expression in first op.
+        private final Mutable<ILogicalExpression> firstExprRef;
+        
+        // Variable that this expression has been assigned to.
+        private LogicalVariable var;
+        
+        public ExprEquivalenceClass(ILogicalOperator firstOp, Mutable<ILogicalExpression> firstExprRef) {
+            this.firstOp = firstOp;
+            this.firstExprRef = firstExprRef;
+        }
+        
+        public ILogicalOperator getFirstOperator() {
+            return firstOp;
+        }
+        
+        public Mutable<ILogicalExpression> getFirstExpression() {
+            return firstExprRef;
+        }
+        
+        public void setVariable(LogicalVariable var) {
+            this.var = var;
+        }
+        
+        public LogicalVariable getVariable() {
+            return var;
+        }
+        
+        public boolean variableIsSet() {
+            return var != null;
+        }
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
new file mode 100644
index 0000000..df8ddda
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineSingleReferenceVariablesRule.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+
+/**
+ * Inlines variables that are referenced exactly once.
+ * 
+ * Preconditions/Assumptions:
+ * Assumes no projects are in the plan.
+ * 
+ * Example assuming variable $$3 is referenced exactly once.
+ * 
+ * Before plan:
+ * select (funcA($$3))
+ *   ...
+ *     assign [$$3] <- [field-access($$0, 1)]
+ * 
+ * After plan:
+ * select (funcA(field-access($$0, 1))
+ *   ...
+ *     assign [] <- []
+ */
+public class InlineSingleReferenceVariablesRule extends InlineVariablesRule {
+
+    // Maps from variable to a list of operators using that variable.
+    protected Map<LogicalVariable, List<ILogicalOperator>> usedVarsMap = new HashMap<LogicalVariable, List<ILogicalOperator>>();
+    protected List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
+    
+    @Override
+    protected void prepare(IOptimizationContext context) {
+        super.prepare(context);
+        usedVarsMap.clear();
+        usedVars.clear();
+    }
+    
+    @Override
+    protected boolean performFinalAction() throws AlgebricksException {
+        boolean modified = false;
+        for (Map.Entry<LogicalVariable, List<ILogicalOperator>> entry : usedVarsMap.entrySet()) {
+            // Perform replacement only if variable is referenced a single time.
+            if (entry.getValue().size() == 1) {
+                ILogicalOperator op = entry.getValue().get(0);
+                if (!op.requiresVariableReferenceExpressions()) {
+                    inlineVisitor.setOperator(op);
+                    inlineVisitor.setTargetVariable(entry.getKey());
+                    if (op.acceptExpressionTransform(inlineVisitor)) {
+                        modified = true;
+                    }
+                    inlineVisitor.setTargetVariable(null);
+                }         
+            }
+        }
+        return modified;
+    }
+
+    @Override
+    protected boolean performBottomUpAction(AbstractLogicalOperator op) throws AlgebricksException {
+        usedVars.clear();
+        VariableUtilities.getUsedVariables(op, usedVars);
+        for (LogicalVariable var : usedVars) {
+            List<ILogicalOperator> opsUsingVar = usedVarsMap.get(var);
+            if (opsUsingVar == null) {
+                opsUsingVar = new ArrayList<ILogicalOperator>();
+                usedVarsMap.put(var, opsUsingVar);
+            }
+            opsUsingVar.add(op);
+        }
+        return false;
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
index 8a79d81..7fed577 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
@@ -15,330 +15,231 @@
 package edu.uci.ics.hyracks.algebricks.rewriter.rules;
 
 import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.LinkedList;
+import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.lang3.mutable.Mutable;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.EquivalenceClass;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
-import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
 import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
-import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;
-import edu.uci.ics.hyracks.algebricks.core.config.AlgebricksConfig;
 import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
 
+/**
+ * Replaces variable reference expressions with their assigned function-call expression where applicable
+ * (some variables are generated by datasources).
+ * Inlining variables may enable other optimizations by allowing selects and assigns to be moved
+ * (e.g., a select may be pushed into a join to enable an efficient physical join operator).
+ * 
+ * Preconditions/Assumptions:
+ * Assumes no projects are in the plan. Only inlines variables whose assigned expression is a function call 
+ * (i.e., this rule ignores right-hand side constants and other variable references expressions  
+ * 
+ * Postconditions/Examples:
+ * All qualifying variables have been inlined.
+ * 
+ * Example (simplified):
+ * 
+ * Before plan:
+ * select <- [$$1 < $$2 + $$0]
+ *   assign [$$2] <- [funcZ() + $$0]
+ *     assign [$$0, $$1] <- [funcX(), funcY()]
+ * 
+ * After plan:
+ * select <- [funcY() < funcZ() + funcX() + funcX()]
+ *   assign [$$2] <- [funcZ() + funcX()]
+ *     assign [$$0, $$1] <- [funcX(), funcY()]
+ */
 public class InlineVariablesRule implements IAlgebraicRewriteRule {
 
+    // Map of variables that could be replaced by their producing expression.
+    // Populated during the top-down sweep of the plan.
+    protected Map<LogicalVariable, ILogicalExpression> varAssignRhs = new HashMap<LogicalVariable, ILogicalExpression>();
+
+    // Visitor for replacing variable reference expressions with their originating expression.
+    protected InlineVariablesVisitor inlineVisitor = new InlineVariablesVisitor(varAssignRhs);
+    
+    // Set of FunctionIdentifiers that we should not inline.
+    protected Set<FunctionIdentifier> doNotInlineFuncs = new HashSet<FunctionIdentifier>();
+    
+    protected boolean hasRun = false;
+    
     @Override
     public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
         return false;
     }
 
     @Override
-    /**
-     * 
-     * Does one big DFS sweep over the plan.
-     * 
-     */
     public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        if (hasRun) {
+            return false;
+        }
         if (context.checkIfInDontApplySet(this, opRef.getValue())) {
             return false;
         }
-        VariableSubstitutionVisitor substVisitor = new VariableSubstitutionVisitor(false);
-        VariableSubstitutionVisitor substVisitorForWrites = new VariableSubstitutionVisitor(true);
-        substVisitor.setContext(context);
-        substVisitorForWrites.setContext(context);
-        Pair<Boolean, Boolean> bb = collectEqClassesAndRemoveRedundantOps(opRef, context, true,
-                new LinkedList<EquivalenceClass>(), substVisitor, substVisitorForWrites);
-        return bb.first;
+        prepare(context);
+        boolean modified = inlineVariables(opRef, context);
+        if (performFinalAction()) {
+            modified = true;
+        }
+        hasRun = true;
+        return modified;
+    }
+    
+    protected void prepare(IOptimizationContext context) {
+        varAssignRhs.clear();
+        inlineVisitor.setContext(context);
+    }
+    
+    protected boolean performBottomUpAction(AbstractLogicalOperator op) throws AlgebricksException {
+        // Only inline variables in operators that can deal with arbitrary expressions.
+        if (!op.requiresVariableReferenceExpressions()) {
+            inlineVisitor.setOperator(op);
+            return op.acceptExpressionTransform(inlineVisitor);
+        }
+        return false;
     }
 
-    private Pair<Boolean, Boolean> collectEqClassesAndRemoveRedundantOps(Mutable<ILogicalOperator> opRef,
-            IOptimizationContext context, boolean first, List<EquivalenceClass> equivClasses,
-            VariableSubstitutionVisitor substVisitor, VariableSubstitutionVisitor substVisitorForWrites)
+    protected boolean performFinalAction() throws AlgebricksException {
+        return false;
+    }
+    
+    protected boolean inlineVariables(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
             throws AlgebricksException {
         AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
-        // if (context.checkIfInDontApplySet(this, opRef.getValue())) {
-        // return false;
-        // }
-        if (op.getOperatorTag() == LogicalOperatorTag.UNNEST_MAP) {
-            return new Pair<Boolean, Boolean>(false, false);
-        }
-        boolean modified = false;
-        boolean ecChange = false;
-        int cnt = 0;
-        for (Mutable<ILogicalOperator> i : op.getInputs()) {
-            boolean isOuterInputBranch = op.getOperatorTag() == LogicalOperatorTag.LEFTOUTERJOIN && cnt == 1;
-            List<EquivalenceClass> eqc = isOuterInputBranch ? new LinkedList<EquivalenceClass>() : equivClasses;
-
-            Pair<Boolean, Boolean> bb = (collectEqClassesAndRemoveRedundantOps(i, context, false, eqc, substVisitor,
-                    substVisitorForWrites));
-
-            if (bb.first) {
-                modified = true;
-            }
-            if (bb.second) {
-                ecChange = true;
-            }
-
-            if (isOuterInputBranch) {
-                if (AlgebricksConfig.DEBUG) {
-                    AlgebricksConfig.ALGEBRICKS_LOGGER.finest("--- Equivalence classes for inner branch of outer op.: "
-                            + eqc + "\n");
-                }
-                for (EquivalenceClass ec : eqc) {
-                    if (!ec.representativeIsConst()) {
-                        equivClasses.add(ec);
-                    }
-                }
-            }
-
-            ++cnt;
-        }
-        if (op.hasNestedPlans()) {
-            AbstractOperatorWithNestedPlans n = (AbstractOperatorWithNestedPlans) op;
-            List<EquivalenceClass> eqc = equivClasses;
-            if (n.getOperatorTag() == LogicalOperatorTag.SUBPLAN) {
-                eqc = new LinkedList<EquivalenceClass>();
-            } else {
-                eqc = equivClasses;
-            }
-            for (ILogicalPlan p : n.getNestedPlans()) {
-                for (Mutable<ILogicalOperator> r : p.getRoots()) {
-                    Pair<Boolean, Boolean> bb = collectEqClassesAndRemoveRedundantOps(r, context, false, eqc,
-                            substVisitor, substVisitorForWrites);
-                    if (bb.first) {
-                        modified = true;
-                    }
-                    if (bb.second) {
-                        ecChange = true;
-                    }
-                }
-            }
-        }
-        // we assume a variable is assigned a value only once
+        
+        // Update mapping from variables to expressions during top-down traversal.
         if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
-            AssignOperator a = (AssignOperator) op;
-            ILogicalExpression rhs = a.getExpressions().get(0).getValue();
-            if (rhs.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-                LogicalVariable varLeft = a.getVariables().get(0);
-                VariableReferenceExpression varRef = (VariableReferenceExpression) rhs;
-                LogicalVariable varRight = varRef.getVariableReference();
-
-                EquivalenceClass ecRight = findEquivClass(varRight, equivClasses);
-                if (ecRight != null) {
-                    ecRight.addMember(varLeft);
-                } else {
-                    List<LogicalVariable> m = new LinkedList<LogicalVariable>();
-                    m.add(varRight);
-                    m.add(varLeft);
-                    EquivalenceClass ec = new EquivalenceClass(m, varRight);
-                    equivClasses.add(ec);
-                    if (AlgebricksConfig.DEBUG) {
-                        AlgebricksConfig.ALGEBRICKS_LOGGER.finest("--- New equivalence class: " + ec + "\n");
+            AssignOperator assignOp = (AssignOperator) op;
+            List<LogicalVariable> vars = assignOp.getVariables();
+            List<Mutable<ILogicalExpression>> exprs = assignOp.getExpressions();            
+            for (int i = 0; i < vars.size(); i++) {
+                ILogicalExpression expr = exprs.get(i).getValue();
+                // Ignore functions that are in the doNotInline set.                
+                if (expr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
+                    AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+                    if (doNotInlineFuncs.contains(funcExpr.getFunctionIdentifier())) {
+                        continue;
                     }
                 }
-                ecChange = true;
-            } else if (((AbstractLogicalExpression) rhs).getExpressionTag() == LogicalExpressionTag.CONSTANT) {
-                LogicalVariable varLeft = a.getVariables().get(0);
-                List<LogicalVariable> m = new LinkedList<LogicalVariable>();
-                m.add(varLeft);
-                EquivalenceClass ec = new EquivalenceClass(m, (ConstantExpression) rhs);
-                // equivClassesForParent.add(ec);
-                equivClasses.add(ec);
-                ecChange = true;
-            }
-        } else if (op.getOperatorTag() == LogicalOperatorTag.GROUP && !(context.checkIfInDontApplySet(this, op))) {
-            GroupByOperator group = (GroupByOperator) op;
-            Pair<Boolean, Boolean> r1 = processVarExprPairs(group.getGroupByList(), equivClasses);
-            Pair<Boolean, Boolean> r2 = processVarExprPairs(group.getDecorList(), equivClasses);
-            modified = modified || r1.first || r2.first;
-            ecChange = r1.second || r2.second;
-        }
-        if (op.getOperatorTag() == LogicalOperatorTag.PROJECT) {
-            assignVarsNeededByProject((ProjectOperator) op, equivClasses, context);
-        } else {
-            if (op.getOperatorTag() == LogicalOperatorTag.WRITE) {
-                substVisitorForWrites.setEquivalenceClasses(equivClasses);
-                if (op.acceptExpressionTransform(substVisitorForWrites)) {
-                    modified = true;
-                }
-            } else {
-                substVisitor.setEquivalenceClasses(equivClasses);
-                if (op.acceptExpressionTransform(substVisitor)) {
-                    modified = true;
-                    if (op.getOperatorTag() == LogicalOperatorTag.GROUP) {
-                        GroupByOperator group = (GroupByOperator) op;
-                        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> gp : group.getGroupByList()) {
-                            if (gp.first != null
-                                    && gp.second.getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-                                LogicalVariable gv = ((VariableReferenceExpression) gp.second.getValue())
-                                        .getVariableReference();
-                                Iterator<Pair<LogicalVariable, Mutable<ILogicalExpression>>> iter = group
-                                        .getDecorList().iterator();
-                                while (iter.hasNext()) {
-                                    Pair<LogicalVariable, Mutable<ILogicalExpression>> dp = iter.next();
-                                    if (dp.first == null
-                                            && dp.second.getValue().getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-                                        LogicalVariable dv = ((VariableReferenceExpression) dp.second.getValue())
-                                                .getVariableReference();
-                                        if (dv == gv) {
-                                            // The decor variable is redundant,
-                                            // since it is
-                                            // propagated as a grouping
-                                            // variable.
-                                            EquivalenceClass ec1 = findEquivClass(gv, equivClasses);
-                                            if (ec1 != null) {
-                                                ec1.addMember(gp.first);
-                                                ec1.setVariableRepresentative(gp.first);
-                                            } else {
-                                                List<LogicalVariable> varList = new ArrayList<LogicalVariable>();
-                                                varList.add(gp.first);
-                                                varList.add(gv);
-                                                ec1 = new EquivalenceClass(varList, gp.first);
-                                            }
-                                            iter.remove();
-                                            break;
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    }
-                }
+                varAssignRhs.put(vars.get(i), exprs.get(i).getValue());
             }
         }
-        return new Pair<Boolean, Boolean>(modified, ecChange);
-    }
 
-    private Pair<Boolean, Boolean> processVarExprPairs(
-            List<Pair<LogicalVariable, Mutable<ILogicalExpression>>> vePairs, List<EquivalenceClass> equivClasses) {
-        boolean ecFromGroup = false;
+        // Descend into children removing projects on the way.
         boolean modified = false;
-        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> p : vePairs) {
-            ILogicalExpression expr = p.second.getValue();
-            if (p.first != null && expr.getExpressionTag() == LogicalExpressionTag.VARIABLE) {
-                VariableReferenceExpression varRef = (VariableReferenceExpression) expr;
-                LogicalVariable rhsVar = varRef.getVariableReference();
-                ecFromGroup = true;
-                EquivalenceClass ecRight = findEquivClass(rhsVar, equivClasses);
-                if (ecRight != null) {
-                    LogicalVariable replacingVar = ecRight.getVariableRepresentative();
-                    if (replacingVar != null && replacingVar != rhsVar) {
-                        varRef.setVariable(replacingVar);
-                        modified = true;
-                    }
-                }
-            }
+        for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
+            if (inlineVariables(inputOpRef, context)) {
+                modified = true;
+            }            
         }
-        return new Pair<Boolean, Boolean>(modified, ecFromGroup);
+
+        if (performBottomUpAction(op)) {
+            modified = true;
+        }
+        
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+            context.addToDontApplySet(this, op);
+            // Re-enable rules that we may have already tried. They could be applicable now after inlining.
+            context.removeFromAlreadyCompared(opRef.getValue());
+        }
+
+        return modified;
     }
 
-    // Instead of doing this, we could make Projection to be more expressive and
-    // also take constants (or even expression), at the expense of a more
-    // complex project push down.
-    private void assignVarsNeededByProject(ProjectOperator op, List<EquivalenceClass> equivClasses,
-            IOptimizationContext context) throws AlgebricksException {
-        List<LogicalVariable> prVars = op.getVariables();
-        int sz = prVars.size();
-        for (int i = 0; i < sz; i++) {
-            EquivalenceClass ec = findEquivClass(prVars.get(i), equivClasses);
-            if (ec != null) {
-                if (!ec.representativeIsConst()) {
-                    prVars.set(i, ec.getVariableRepresentative());
-                }
-            }
-        }
-    }
-
-    private final static EquivalenceClass findEquivClass(LogicalVariable var, List<EquivalenceClass> equivClasses) {
-        for (EquivalenceClass ec : equivClasses) {
-            if (ec.contains(var)) {
-                return ec;
-            }
-        }
-        return null;
-    }
-
-    private class VariableSubstitutionVisitor implements ILogicalExpressionReferenceTransform {
-        private List<EquivalenceClass> equivClasses;
+    protected class InlineVariablesVisitor implements ILogicalExpressionReferenceTransform {
+        
+        private final Map<LogicalVariable, ILogicalExpression> varAssignRhs;
+        private final Set<LogicalVariable> liveVars = new HashSet<LogicalVariable>();
+        private final List<LogicalVariable> rhsUsedVars = new ArrayList<LogicalVariable>();        
+        private ILogicalOperator op;
         private IOptimizationContext context;
-        private final boolean doNotSubstWithConst;
-
-        public VariableSubstitutionVisitor(boolean doNotSubstWithConst) {
-            this.doNotSubstWithConst = doNotSubstWithConst;
+        // If set, only replace this variable reference.
+        private LogicalVariable targetVar;
+        
+        public InlineVariablesVisitor(Map<LogicalVariable, ILogicalExpression> varAssignRhs) {
+            this.varAssignRhs = varAssignRhs;
         }
-
+        
+        public void setTargetVariable(LogicalVariable targetVar) {
+            this.targetVar = targetVar;
+        }
+        
         public void setContext(IOptimizationContext context) {
             this.context = context;
         }
 
-        public void setEquivalenceClasses(List<EquivalenceClass> equivClasses) {
-            this.equivClasses = equivClasses;
+        public void setOperator(ILogicalOperator op) throws AlgebricksException {
+            this.op = op;
+            liveVars.clear();
         }
-
+        
         @Override
-        public boolean transform(Mutable<ILogicalExpression> exprRef) {
+        public boolean transform(Mutable<ILogicalExpression> exprRef) throws AlgebricksException {            
             ILogicalExpression e = exprRef.getValue();
             switch (((AbstractLogicalExpression) e).getExpressionTag()) {
                 case VARIABLE: {
-                    // look for a required substitution
                     LogicalVariable var = ((VariableReferenceExpression) e).getVariableReference();
+                    // Restrict replacement to targetVar if it has been set.
+                    if (targetVar != null && var != targetVar) {
+                        return false;
+                    }
+                    // Make sure has not been excluded from inlining.
                     if (context.shouldNotBeInlined(var)) {
                         return false;
                     }
-                    EquivalenceClass ec = findEquivClass(var, equivClasses);
-                    if (ec == null) {
+                    ILogicalExpression rhs = varAssignRhs.get(var);
+                    if (rhs == null) {
+                        // Variable was not produced by an assign.
                         return false;
                     }
-                    if (ec.representativeIsConst()) {
-                        if (doNotSubstWithConst) {
-                            return false;
-                        }
-                        exprRef.setValue(ec.getConstRepresentative());
-                        return true;
-                    } else {
-                        LogicalVariable r = ec.getVariableRepresentative();
-                        if (!r.equals(var)) {
-                            exprRef.setValue(new VariableReferenceExpression(r));
-                            return true;
-                        } else {
+                    
+                    // Make sure used variables from rhs are live.
+                    if (liveVars.isEmpty()) {
+                        VariableUtilities.getLiveVariables(op, liveVars);
+                    }
+                    rhsUsedVars.clear();
+                    rhs.getUsedVariables(rhsUsedVars);
+                    for (LogicalVariable rhsUsedVar : rhsUsedVars) {
+                        if (!liveVars.contains(rhsUsedVar)) {
                             return false;
                         }
                     }
+                    
+                    // Replace variable reference with a clone of the rhs expr.
+                    exprRef.setValue(rhs.cloneExpression());
+                    return true;
                 }
                 case FUNCTION_CALL: {
                     AbstractFunctionCallExpression fce = (AbstractFunctionCallExpression) e;
-                    boolean m = false;
+                    boolean modified = false;
                     for (Mutable<ILogicalExpression> arg : fce.getArguments()) {
                         if (transform(arg)) {
-                            m = true;
+                            modified = true;
                         }
                     }
-                    return m;
+                    return modified;
                 }
                 default: {
                     return false;
                 }
             }
         }
-
     }
 }
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InsertProjectBeforeUnionRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InsertProjectBeforeUnionRule.java
index d54833e..431fca1 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InsertProjectBeforeUnionRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/InsertProjectBeforeUnionRule.java
@@ -87,6 +87,7 @@
         projectOp.getInputs().add(new MutableObject<ILogicalOperator>(parentOp));
         opUnion.getInputs().get(branch).setValue(projectOp);
         projectOp.setPhysicalOperator(new StreamProjectPOperator());
+        context.computeAndSetTypeEnvironmentForOperator(projectOp);
         context.computeAndSetTypeEnvironmentForOperator(parentOp);
     }
 
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
new file mode 100644
index 0000000..a057f4f
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/IntroduceProjectsRule.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Projects away unused variables at the earliest possible point.
+ * Does a full DFS sweep of the plan adding ProjectOperators in the bottom-up pass.
+ * Also, removes projects that have become useless.
+ * TODO: This rule 'recklessly' adds as many projects as possible, but there is no guarantee
+ * that the overall cost of the plan is reduced since project operators also add a cost.
+ */
+public class IntroduceProjectsRule implements IAlgebraicRewriteRule {
+
+    private final Set<LogicalVariable> usedVars = new HashSet<LogicalVariable>();
+    private final Set<LogicalVariable> liveVars = new HashSet<LogicalVariable>();
+    private final Set<LogicalVariable> producedVars = new HashSet<LogicalVariable>();
+    private final List<LogicalVariable> projectVars = new ArrayList<LogicalVariable>();
+    protected boolean hasRun = false;
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        if (hasRun) {
+            return false;
+        }
+        hasRun = true;
+        return introduceProjects(null, -1, opRef, Collections.<LogicalVariable> emptySet(), context);
+    }
+
+    protected boolean introduceProjects(AbstractLogicalOperator parentOp, int parentInputIndex,
+            Mutable<ILogicalOperator> opRef, Set<LogicalVariable> parentUsedVars, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        boolean modified = false;
+
+        usedVars.clear();
+        VariableUtilities.getUsedVariables(op, usedVars);
+
+        // In the top-down pass, maintain a set of variables that are used in op and all its parents.
+        HashSet<LogicalVariable> parentsUsedVars = new HashSet<LogicalVariable>();
+        parentsUsedVars.addAll(parentUsedVars);
+        parentsUsedVars.addAll(usedVars);
+
+        // Descend into children.        
+        for (int i = 0; i < op.getInputs().size(); i++) {
+            Mutable<ILogicalOperator> inputOpRef = op.getInputs().get(i);
+            if (introduceProjects(op, i, inputOpRef, parentsUsedVars, context)) {
+                modified = true;
+            }
+        }
+
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+        }
+        // In the bottom-up pass, determine which live variables are not used by op's parents.
+        // Such variables are be projected away.
+        liveVars.clear();
+        VariableUtilities.getLiveVariables(op, liveVars);
+        producedVars.clear();
+        VariableUtilities.getProducedVariables(op, producedVars);
+        liveVars.removeAll(producedVars);
+
+        projectVars.clear();
+        for (LogicalVariable liveVar : liveVars) {
+            if (parentsUsedVars.contains(liveVar)) {
+                projectVars.add(liveVar);
+            }
+        }
+
+        // Some of the variables that are live at this op are not used above.
+        if (projectVars.size() != liveVars.size()) {
+            // Add a project operator under each of op's qualifying input branches.
+            for (int i = 0; i < op.getInputs().size(); i++) {
+                ILogicalOperator childOp = op.getInputs().get(i).getValue();
+                liveVars.clear();
+                VariableUtilities.getLiveVariables(childOp, liveVars);
+                List<LogicalVariable> vars = new ArrayList<LogicalVariable>();
+                vars.addAll(projectVars);
+                // Only retain those variables that are live in the i-th input branch.
+                vars.retainAll(liveVars);
+                if (vars.size() != liveVars.size()) {
+                    ProjectOperator projectOp = new ProjectOperator(vars);
+                    projectOp.getInputs().add(new MutableObject<ILogicalOperator>(childOp));
+                    op.getInputs().get(i).setValue(projectOp);
+                    context.computeAndSetTypeEnvironmentForOperator(projectOp);
+                    modified = true;
+                }
+            }
+        } else if (op.getOperatorTag() == LogicalOperatorTag.PROJECT) {
+            // Check if the existing project has become useless.
+            liveVars.clear();
+            VariableUtilities.getLiveVariables(op.getInputs().get(0).getValue(), liveVars);
+            ProjectOperator projectOp = (ProjectOperator) op;
+            List<LogicalVariable> projectVars = projectOp.getVariables();
+            if (liveVars.size() == projectVars.size() && liveVars.containsAll(projectVars)) {
+                boolean eliminateProject = true;
+                // For UnionAll the variables must also be in exactly the correct order.
+                if (parentOp.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
+                    eliminateProject = canEliminateProjectBelowUnion((UnionAllOperator) parentOp, projectOp,
+                            parentInputIndex);
+                }
+                if (eliminateProject) {
+                    // The existing project has become useless. Remove it.
+                    parentOp.getInputs().get(parentInputIndex).setValue(op.getInputs().get(0).getValue());
+                }
+            }
+        }
+
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+        }
+        return modified;
+    }
+    
+    private boolean canEliminateProjectBelowUnion(UnionAllOperator unionOp, ProjectOperator projectOp,
+            int unionInputIndex) throws AlgebricksException {
+        List<LogicalVariable> orderedLiveVars = new ArrayList<LogicalVariable>();
+        VariableUtilities.getLiveVariables(projectOp.getInputs().get(0).getValue(), orderedLiveVars);
+        int numVars = orderedLiveVars.size();
+        for (int i = 0; i < numVars; i++) {
+            Triple<LogicalVariable, LogicalVariable, LogicalVariable> varTriple = unionOp.getVariableMappings().get(i);
+            if (unionInputIndex == 0) {
+                if (varTriple.first != orderedLiveVars.get(i)) {
+                    return false;
+                }
+            } else {
+                if (varTriple.second != orderedLiveVars.get(i)) {
+                    return false;
+                }
+            }
+        }
+        return true;
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PullSelectOutOfEqJoin.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PullSelectOutOfEqJoin.java
index 75862cf..8b4f0a1 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PullSelectOutOfEqJoin.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PullSelectOutOfEqJoin.java
@@ -38,9 +38,6 @@
 
 public class PullSelectOutOfEqJoin implements IAlgebraicRewriteRule {
 
-    private List<Mutable<ILogicalExpression>> eqVarVarComps = new ArrayList<Mutable<ILogicalExpression>>();
-    private List<Mutable<ILogicalExpression>> otherPredicates = new ArrayList<Mutable<ILogicalExpression>>();
-
     @Override
     public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
         return false;
@@ -66,8 +63,8 @@
         if (!fi.equals(AlgebricksBuiltinFunctions.AND)) {
             return false;
         }
-        eqVarVarComps.clear();
-        otherPredicates.clear();
+        List<Mutable<ILogicalExpression>> eqVarVarComps = new ArrayList<Mutable<ILogicalExpression>>();
+        List<Mutable<ILogicalExpression>> otherPredicates = new ArrayList<Mutable<ILogicalExpression>>();
         for (Mutable<ILogicalExpression> arg : fexp.getArguments()) {
             if (isEqVarVar(arg.getValue())) {
                 eqVarVarComps.add(arg);
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushAssignBelowUnionAllRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushAssignBelowUnionAllRule.java
new file mode 100644
index 0000000..1bcf95a
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushAssignBelowUnionAllRule.java
@@ -0,0 +1,149 @@
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Pushes an AssignOperator below a UnionAll operator by creating an new AssignOperator below each of 
+ * the UnionAllOperator's branches with appropriate variable replacements.
+ * This rule can help to enable other rules that are difficult to fire across a UnionAllOperator, 
+ * for example, eliminating common sub-expressions.
+ * 
+ * Example:
+ * 
+ * Before plan:
+ * ...
+ * assign [$$20, $$21] <- [funcA($$3), funcB($$6)]
+ *   union ($$1, $$2, $$3) ($$4, $$5, $$6)
+ *     union_branch_0
+ *       ...
+ *     union_branch_1
+ *       ...
+ *     
+ * After plan:
+ * ...
+ * union ($$1, $$2, $$3) ($$4, $$5, $$6) ($$22, $$24, $$20) ($$23, $$25, $$21)
+ *   assign [$$22, $$23] <- [funcA($$1), funcB($$4)]
+ *     union_branch_0
+ *       ...
+ *   assign [$$24, $$25] <- [funcA($$2), funcB($$5)]
+ *     union_branch_1
+ *       ...
+ */
+public class PushAssignBelowUnionAllRule implements IAlgebraicRewriteRule {
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        if (!op.hasInputs()) {
+            return false;
+        }
+
+        boolean modified = false;
+        for (int i = 0; i < op.getInputs().size(); i++) {
+            AbstractLogicalOperator childOp = (AbstractLogicalOperator) op.getInputs().get(i).getValue();
+            if (childOp.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
+                continue;
+            }
+            AssignOperator assignOp = (AssignOperator) childOp;
+
+            AbstractLogicalOperator childOfChildOp = (AbstractLogicalOperator) assignOp.getInputs().get(0).getValue();
+            if (childOfChildOp.getOperatorTag() != LogicalOperatorTag.UNIONALL) {
+                continue;
+            }
+            UnionAllOperator unionOp = (UnionAllOperator) childOfChildOp;
+
+            Set<LogicalVariable> assignUsedVars = new HashSet<LogicalVariable>();
+            VariableUtilities.getUsedVariables(assignOp, assignUsedVars);
+
+            List<LogicalVariable> assignVars = assignOp.getVariables();
+
+            AssignOperator[] newAssignOps = new AssignOperator[2];
+            for (int j = 0; j < unionOp.getInputs().size(); j++) {
+                newAssignOps[j] = createAssignBelowUnionAllBranch(unionOp, j, assignOp, assignUsedVars, context);
+            }
+            // Add original assign variables to the union variable mappings.
+            for (int j = 0; j < assignVars.size(); j++) {
+                LogicalVariable first = newAssignOps[0].getVariables().get(j);
+                LogicalVariable second = newAssignOps[1].getVariables().get(j);
+                Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping = new Triple<LogicalVariable, LogicalVariable, LogicalVariable>(
+                        first, second, assignVars.get(j));
+                unionOp.getVariableMappings().add(varMapping);
+            }
+            context.computeAndSetTypeEnvironmentForOperator(unionOp);
+
+            // Remove original assign operator.
+            op.getInputs().set(i, assignOp.getInputs().get(0));
+            context.computeAndSetTypeEnvironmentForOperator(op);
+            modified = true;
+        }
+
+        return modified;
+    }
+
+    private AssignOperator createAssignBelowUnionAllBranch(UnionAllOperator unionOp, int inputIndex,
+            AssignOperator originalAssignOp, Set<LogicalVariable> assignUsedVars, IOptimizationContext context)
+            throws AlgebricksException {
+        AssignOperator newAssignOp = cloneAssignOperator(originalAssignOp, context);
+        newAssignOp.getInputs()
+                .add(new MutableObject<ILogicalOperator>(unionOp.getInputs().get(inputIndex).getValue()));
+        context.computeAndSetTypeEnvironmentForOperator(newAssignOp);
+        unionOp.getInputs().get(inputIndex).setValue(newAssignOp);
+        int numVarMappings = unionOp.getVariableMappings().size();
+        for (int i = 0; i < numVarMappings; i++) {
+            Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping = unionOp.getVariableMappings().get(i);
+            if (assignUsedVars.contains(varMapping.third)) {
+                LogicalVariable replacementVar;
+                if (inputIndex == 0) {
+                    replacementVar = varMapping.first;
+                } else {
+                    replacementVar = varMapping.second;
+                }
+                VariableUtilities.substituteVariables(newAssignOp, varMapping.third, replacementVar, context);
+            }
+        }
+        return newAssignOp;
+    }
+
+    /**
+     * Clones the given assign operator changing the returned variables to be new ones.
+     * Also, leaves the inputs of the clone clear.
+     */
+    private AssignOperator cloneAssignOperator(AssignOperator assignOp, IOptimizationContext context) {
+        List<LogicalVariable> vars = new ArrayList<LogicalVariable>();
+        List<Mutable<ILogicalExpression>> exprs = new ArrayList<Mutable<ILogicalExpression>>();
+        int numVars = assignOp.getVariables().size();
+        for (int i = 0; i < numVars; i++) {
+            vars.add(context.newVar());
+            exprs.add(new MutableObject<ILogicalExpression>(assignOp.getExpressions().get(i).getValue()
+                    .cloneExpression()));
+        }
+        AssignOperator assignCloneOp = new AssignOperator(vars, exprs);
+        assignCloneOp.setExecutionMode(assignOp.getExecutionMode());
+        return assignCloneOp;
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushFunctionsBelowJoin.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushFunctionsBelowJoin.java
new file mode 100644
index 0000000..16b010e
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushFunctionsBelowJoin.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+import org.apache.commons.lang3.mutable.MutableObject;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Pushes function-call expressions below a join if possible.
+ * Assigns the result of such function-calls expressions to new variables, and replaces the original
+ * expression with a corresponding variable reference expression.
+ * This rule can help reduce the cost of computing expensive functions by pushing them below
+ * a join (which may blow up the cardinality).
+ * Also, this rule may help to enable other rules such as common subexpression elimination, again to reduce
+ * the number of calls to expensive functions.
+ * 
+ * Example: (we are pushing pushMeFunc)
+ * 
+ * Before plan:
+ * assign [$$10] <- [funcA(funcB(pushMeFunc($$3, $$4)))]
+ *   join (some condition) 
+ *     join_branch_0 where $$3 and $$4 are not live
+ *       ...
+ *     join_branch_1 where $$3 and $$4 are live
+ *       ...
+ * 
+ * After plan:
+ * assign [$$10] <- [funcA(funcB($$11))]
+ *   join (some condition) 
+ *     join_branch_0 where $$3 and $$4 are not live
+ *       ...
+ *     join_branch_1 where $$3 and $$4 are live
+ *       assign[$$11] <- [pushMeFunc($$3, $$4)]
+ *         ...
+ */
+public class PushFunctionsBelowJoin implements IAlgebraicRewriteRule {
+
+    private final Set<FunctionIdentifier> toPushFuncIdents;
+    private final List<Mutable<ILogicalExpression>> funcExprs = new ArrayList<Mutable<ILogicalExpression>>();
+    private final List<LogicalVariable> usedVars = new ArrayList<LogicalVariable>();
+    private final List<LogicalVariable> liveVars = new ArrayList<LogicalVariable>();
+
+    public PushFunctionsBelowJoin(Set<FunctionIdentifier> toPushFuncIdents) {
+        this.toPushFuncIdents = toPushFuncIdents;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        if (op.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
+            return false;
+        }
+        AssignOperator assignOp = (AssignOperator) op;
+
+        // Find a join operator below this assign.
+        Mutable<ILogicalOperator> joinOpRef = findJoinOp(assignOp.getInputs().get(0));
+        if (joinOpRef == null) {
+            return false;
+        }
+        AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinOpRef.getValue();
+
+        // Check if the assign uses a function that we wish to push below the join if possible.
+        funcExprs.clear();
+        gatherFunctionCalls(assignOp, funcExprs);
+        if (funcExprs.isEmpty()) {
+            return false;
+        }
+
+        // Try to push the functions down the input branches of the join.
+        boolean modified = false;
+        if (pushDownFunctions(joinOp, 0, funcExprs, context)) {
+            modified = true;
+        }
+        if (pushDownFunctions(joinOp, 1, funcExprs, context)) {
+            modified = true;
+        }
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(joinOp);
+        }
+        return modified;
+    }
+
+    private Mutable<ILogicalOperator> findJoinOp(Mutable<ILogicalOperator> opRef) {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        switch (op.getOperatorTag()) {
+            case INNERJOIN:
+            case LEFTOUTERJOIN: {
+                return opRef;
+            }
+            // Bail on these operators.
+            case GROUP:
+            case AGGREGATE:
+            case DISTINCT:
+            case UNNEST_MAP: {
+                return null;
+            }
+            // Traverse children.
+            default: {
+                for (Mutable<ILogicalOperator> childOpRef : op.getInputs()) {
+                    return findJoinOp(childOpRef);
+                }
+            }
+        }
+        return null;
+    }
+
+    private void gatherFunctionCalls(AssignOperator assignOp, List<Mutable<ILogicalExpression>> funcExprs) {
+        for (Mutable<ILogicalExpression> exprRef : assignOp.getExpressions()) {
+            gatherFunctionCalls(exprRef, funcExprs);
+        }
+    }
+
+    private void gatherFunctionCalls(Mutable<ILogicalExpression> exprRef, List<Mutable<ILogicalExpression>> funcExprs) {
+        AbstractLogicalExpression expr = (AbstractLogicalExpression) exprRef.getValue();
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return;
+        }
+        // Check whether the function is a function we want to push.
+        AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr;
+        if (toPushFuncIdents.contains(funcExpr.getFunctionIdentifier())) {
+            funcExprs.add(exprRef);
+        }
+        // Traverse arguments.
+        for (Mutable<ILogicalExpression> funcArg : funcExpr.getArguments()) {
+            gatherFunctionCalls(funcArg, funcExprs);
+        }
+    }
+
+    private boolean pushDownFunctions(AbstractBinaryJoinOperator joinOp, int inputIndex,
+            List<Mutable<ILogicalExpression>> funcExprs, IOptimizationContext context) throws AlgebricksException {
+        ILogicalOperator joinInputOp = joinOp.getInputs().get(inputIndex).getValue();
+        liveVars.clear();
+        VariableUtilities.getLiveVariables(joinInputOp, liveVars);
+        Iterator<Mutable<ILogicalExpression>> funcIter = funcExprs.iterator();
+        List<LogicalVariable> assignVars = null;
+        List<Mutable<ILogicalExpression>> assignExprs = null;
+        while (funcIter.hasNext()) {
+            Mutable<ILogicalExpression> funcExprRef = funcIter.next();
+            ILogicalExpression funcExpr = funcExprRef.getValue();
+            usedVars.clear();
+            funcExpr.getUsedVariables(usedVars);
+            // Check if we can push the function down this branch.
+            if (liveVars.containsAll(usedVars)) {
+                if (assignVars == null) {
+                    assignVars = new ArrayList<LogicalVariable>();
+                    assignExprs = new ArrayList<Mutable<ILogicalExpression>>();
+                }
+                // Replace the original expression with a variable reference expression.
+                LogicalVariable replacementVar = context.newVar();
+                assignVars.add(replacementVar);
+                assignExprs.add(new MutableObject<ILogicalExpression>(funcExpr));
+                funcExprRef.setValue(new VariableReferenceExpression(replacementVar));
+                funcIter.remove();
+            }
+        }
+        // Create new assign operator below the join if any functions can be pushed.
+        if (assignVars != null) {
+            AssignOperator newAssign = new AssignOperator(assignVars, assignExprs);
+            newAssign.getInputs().add(new MutableObject<ILogicalOperator>(joinInputOp));
+            newAssign.setExecutionMode(joinOp.getExecutionMode());
+            joinOp.getInputs().get(inputIndex).setValue(newAssign);
+            context.computeAndSetTypeEnvironmentForOperator(newAssign);
+            return true;
+        }
+        return false;
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushSelectIntoJoinRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushSelectIntoJoinRule.java
index 8c679c5..99a6b8c 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushSelectIntoJoinRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/PushSelectIntoJoinRule.java
@@ -143,11 +143,11 @@
         if (!intersectsBranch[0] && !intersectsBranch[1]) {
             return false;
         }
+        if (needToPushOps) {
+            pushOps(pushedOnLeft, joinBranchLeftRef, context);
+            pushOps(pushedOnRight, joinBranchRightRef, context);
+        }
         if (intersectsAllBranches) {
-            if (needToPushOps) {
-                pushOps(pushedOnLeft, joinBranchLeftRef, context);
-                pushOps(pushedOnRight, joinBranchRightRef, context);
-            }
             addCondToJoin(select, join, context);
         } else { // push down
             Iterator<Mutable<ILogicalOperator>> branchIter = join.getInputs().iterator();
@@ -156,13 +156,6 @@
                 Mutable<ILogicalOperator> branch = branchIter.next();
                 boolean inter = intersectsBranch[j];
                 if (inter) {
-                    if (needToPushOps) {
-                        if (j == 0) {
-                            pushOps(pushedOnLeft, joinBranchLeftRef, context);
-                        } else {
-                            pushOps(pushedOnRight, joinBranchRightRef, context);
-                        }
-                    }
                     copySelectToBranch(select, branch, context);
                 }
 
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantGroupByDecorVars.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantGroupByDecorVars.java
new file mode 100644
index 0000000..1106898
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantGroupByDecorVars.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Removes duplicate variables from a group-by operator's decor list.
+ */
+public class RemoveRedundantGroupByDecorVars implements IAlgebraicRewriteRule {
+
+    private final Set<LogicalVariable> vars = new HashSet<LogicalVariable>();
+    
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        if (op.getOperatorTag() != LogicalOperatorTag.GROUP) {
+            return false;
+        }
+        if (context.checkIfInDontApplySet(this, op)) {
+            return false;
+        }
+        vars.clear();
+        
+        boolean modified = false;
+        GroupByOperator groupOp = (GroupByOperator) op;
+        Iterator<Pair<LogicalVariable, Mutable<ILogicalExpression>>> iter = groupOp.getDecorList().iterator();
+        while (iter.hasNext()) {
+            Pair<LogicalVariable, Mutable<ILogicalExpression>> decor = iter.next();
+            if (decor.first != null || decor.second.getValue().getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                continue;
+            }
+            VariableReferenceExpression varRefExpr = (VariableReferenceExpression) decor.second.getValue();
+            LogicalVariable var = varRefExpr.getVariableReference();
+            if (vars.contains(var)) {
+                iter.remove();
+                modified = true;
+            } else {
+                vars.add(var);
+            }
+        }
+        if (modified) {
+            context.addToDontApplySet(this, op);
+        }
+        return modified;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        return false;
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantVariablesRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantVariablesRule.java
new file mode 100644
index 0000000..ec57be5
--- /dev/null
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveRedundantVariablesRule.java
@@ -0,0 +1,291 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.algebricks.rewriter.rules;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.lang3.mutable.Mutable;
+
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.IOptimizationContext;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.AbstractLogicalExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.GroupByOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.visitors.ILogicalExpressionReferenceTransform;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+
+/**
+ * Replaces redundant variable references with their bottom-most equivalent representative.
+ * Does a DFS sweep over the plan keeping track of variable equivalence classes.
+ * For example, this rule would perform the following rewrite.
+ * 
+ * Before Plan:
+ * select (function-call: func, Args:[%0->$$11])
+ *   project [$11]
+ *     assign [$$11] <- [$$10]
+ *       assign [$$10] <- [$$9]
+ *         assign [$$9] <- ...
+ *           ...
+ *           
+ * After Plan:
+ * select (function-call: func, Args:[%0->$$9])
+ *   project [$9]
+ *     assign [$$11] <- [$$9]
+ *       assign [$$10] <- [$$9]
+ *         assign [$$9] <- ...
+ *           ...
+ */
+public class RemoveRedundantVariablesRule implements IAlgebraicRewriteRule {
+
+    private final VariableSubstitutionVisitor substVisitor = new VariableSubstitutionVisitor();
+    private final Map<LogicalVariable, List<LogicalVariable>> equivalentVarsMap = new HashMap<LogicalVariable, List<LogicalVariable>>();
+
+    protected boolean hasRun = false;
+    
+    @Override
+    public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
+        return false;
+    }
+
+    @Override
+    public boolean rewritePre(Mutable<ILogicalOperator> opRef, IOptimizationContext context) throws AlgebricksException {
+        if (context.checkIfInDontApplySet(this, opRef.getValue())) {
+            return false;
+        }
+        equivalentVarsMap.clear();
+        boolean modified = removeRedundantVariables(opRef, context);
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(opRef.getValue());
+        }
+        return modified;
+    }
+
+    private void updateEquivalenceClassMap(LogicalVariable lhs, LogicalVariable rhs) {
+        List<LogicalVariable> equivalentVars = equivalentVarsMap.get(rhs);
+        if (equivalentVars == null) {
+            equivalentVars = new ArrayList<LogicalVariable>();
+            // The first element in the list is the bottom-most representative which will replace all equivalent vars.
+            equivalentVars.add(rhs);
+            equivalentVars.add(lhs);
+            equivalentVarsMap.put(rhs, equivalentVars);
+        }
+        equivalentVarsMap.put(lhs, equivalentVars);
+        equivalentVars.get(0);
+    }
+
+    private boolean removeRedundantVariables(Mutable<ILogicalOperator> opRef, IOptimizationContext context)
+            throws AlgebricksException {
+        AbstractLogicalOperator op = (AbstractLogicalOperator) opRef.getValue();
+        boolean modified = false;
+
+        // Recurse into children.
+        for (Mutable<ILogicalOperator> inputOpRef : op.getInputs()) {
+            if (removeRedundantVariables(inputOpRef, context)) {
+                modified = true;
+            }
+        }
+
+        // Update equivalence class map.
+        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+            AssignOperator assignOp = (AssignOperator) op;
+            int numVars = assignOp.getVariables().size();
+            for (int i = 0; i < numVars; i++) {
+                ILogicalExpression expr = assignOp.getExpressions().get(i).getValue();
+                if (expr.getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                    continue;
+                }
+                VariableReferenceExpression rhsVarRefExpr = (VariableReferenceExpression) expr;
+                // Update equivalence class map.
+                LogicalVariable lhs = assignOp.getVariables().get(i);
+                LogicalVariable rhs = rhsVarRefExpr.getVariableReference();
+                updateEquivalenceClassMap(lhs, rhs);
+            }
+        }
+
+        // Replace variable references with their first representative. 
+        if (op.getOperatorTag() == LogicalOperatorTag.PROJECT) {
+            // The project operator does not use expressions, so we need to replace it's variables manually.
+            if (replaceProjectVars((ProjectOperator) op)) {
+                modified = true;
+            }
+        } else if(op.getOperatorTag() == LogicalOperatorTag.UNIONALL) {
+            // Replace redundant variables manually in the UnionAll operator.
+            if (replaceUnionAllVars((UnionAllOperator) op)) {
+                modified = true;
+            }
+        } else {
+            if (op.acceptExpressionTransform(substVisitor)) {
+                modified = true;
+            }
+        }
+
+        // Perform variable replacement in nested plans. 
+        if (op.hasNestedPlans()) {
+            AbstractOperatorWithNestedPlans opWithNestedPlan = (AbstractOperatorWithNestedPlans) op;
+            for (ILogicalPlan nestedPlan : opWithNestedPlan.getNestedPlans()) {
+                for (Mutable<ILogicalOperator> rootRef : nestedPlan.getRoots()) {
+                    if (removeRedundantVariables(rootRef, context)) {
+                        modified = true;
+                    }
+                }
+            }
+        }
+
+        // Deal with re-mapping of variables in group by.
+        if (op.getOperatorTag() == LogicalOperatorTag.GROUP) {
+            if (handleGroupByVarRemapping((GroupByOperator) op)) {
+                modified = true;
+            }
+        }
+
+        if (modified) {
+            context.computeAndSetTypeEnvironmentForOperator(op);
+            context.addToDontApplySet(this, op);
+        }
+        return modified;
+    }
+
+    private boolean handleGroupByVarRemapping(GroupByOperator groupOp) {
+        boolean modified = false;
+        for (Pair<LogicalVariable, Mutable<ILogicalExpression>> gp : groupOp.getGroupByList()) {
+            if (gp.first == null || gp.second.getValue().getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                continue;
+            }
+            LogicalVariable groupByVar = ((VariableReferenceExpression) gp.second.getValue()).getVariableReference();
+            Iterator<Pair<LogicalVariable, Mutable<ILogicalExpression>>> iter = groupOp.getDecorList().iterator();
+            while (iter.hasNext()) {
+                Pair<LogicalVariable, Mutable<ILogicalExpression>> dp = iter.next();
+                if (dp.first != null || dp.second.getValue().getExpressionTag() != LogicalExpressionTag.VARIABLE) {
+                    continue;
+                }
+                LogicalVariable dv = ((VariableReferenceExpression) dp.second.getValue()).getVariableReference();
+                if (dv == groupByVar) {
+                    // The decor variable is redundant, since it is propagated as a grouping variable.
+                    List<LogicalVariable> equivalentVars = equivalentVarsMap.get(groupByVar);
+                    if (equivalentVars != null) {
+                        // Change representative of this equivalence class.
+                        equivalentVars.set(0, gp.first);
+                        equivalentVarsMap.put(gp.first, equivalentVars);
+                    } else {
+                        updateEquivalenceClassMap(gp.first, groupByVar);
+                    }
+                    iter.remove();
+                    modified = true;
+                    break;
+                }
+            }
+        }
+        return modified;
+    }
+
+    /**
+     * Replace the projects's variables with their corresponding representative
+     * from the equivalence class map (if any).
+     * We cannot use the VariableSubstitutionVisitor here because the project ops
+     * maintain their variables as a list and not as expressions.
+     */
+    private boolean replaceProjectVars(ProjectOperator op) throws AlgebricksException {
+        List<LogicalVariable> vars = op.getVariables();
+        int size = vars.size();
+        boolean modified = false;
+        for (int i = 0; i < size; i++) {
+            LogicalVariable var = vars.get(i);
+            List<LogicalVariable> equivalentVars = equivalentVarsMap.get(var);
+            if (equivalentVars == null) {
+                continue;
+            }
+            // Replace with equivalence class representative.
+            LogicalVariable representative = equivalentVars.get(0);
+            if (representative != var) {
+                vars.set(i, equivalentVars.get(0));
+                modified = true;
+            }
+        }
+        return modified;
+    }
+
+    private boolean replaceUnionAllVars(UnionAllOperator op) throws AlgebricksException {
+        boolean modified = false;
+        for (Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping : op.getVariableMappings()) {
+            List<LogicalVariable> firstEquivalentVars = equivalentVarsMap.get(varMapping.first);
+            List<LogicalVariable> secondEquivalentVars = equivalentVarsMap.get(varMapping.second);
+            // Replace variables with their representative.
+            if (firstEquivalentVars != null) {
+                varMapping.first = firstEquivalentVars.get(0);
+                modified = true;
+            }
+            if (secondEquivalentVars != null) {
+                varMapping.second = secondEquivalentVars.get(0);
+                modified = true;
+            }
+        }
+        return modified;
+    }
+    
+    private class VariableSubstitutionVisitor implements ILogicalExpressionReferenceTransform {
+        @Override
+        public boolean transform(Mutable<ILogicalExpression> exprRef) {
+            ILogicalExpression e = exprRef.getValue();
+            switch (((AbstractLogicalExpression) e).getExpressionTag()) {
+                case VARIABLE: {
+                    // Replace variable references with their equivalent representative in the equivalence class map.
+                    VariableReferenceExpression varRefExpr = (VariableReferenceExpression) e;
+                    LogicalVariable var = varRefExpr.getVariableReference();
+                    List<LogicalVariable> equivalentVars = equivalentVarsMap.get(var);
+                    if (equivalentVars == null) {
+                        return false;
+                    }
+                    LogicalVariable representative = equivalentVars.get(0);
+                    if (representative != var) {
+                        varRefExpr.setVariable(representative);
+                        return true;
+                    }
+                    return false;
+                }
+                case FUNCTION_CALL: {
+                    AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) e;
+                    boolean modified = false;
+                    for (Mutable<ILogicalExpression> arg : funcExpr.getArguments()) {
+                        if (transform(arg)) {
+                            modified = true;
+                        }
+                    }
+                    return modified;
+                }
+                default: {
+                    return false;
+                }
+            }
+        }
+    }
+}
diff --git a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
index c53ea0a..e0c2741 100644
--- a/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
+++ b/algebricks/algebricks-rewriter/src/main/java/edu/uci/ics/hyracks/algebricks/rewriter/rules/RemoveUnusedAssignAndAggregateRule.java
@@ -23,6 +23,7 @@
 import org.apache.commons.lang3.mutable.Mutable;
 
 import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Triple;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
@@ -33,10 +34,14 @@
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AbstractOperatorWithNestedPlans;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
+import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnionAllOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.UnnestOperator;
 import edu.uci.ics.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
 import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
 
+/**
+ * Removes unused variables from Assign, Unnest, Aggregate, and UnionAll operators.
+ */
 public class RemoveUnusedAssignAndAggregateRule implements IAlgebraicRewriteRule {
 
     @Override
@@ -55,7 +60,7 @@
         if (smthToRemove) {
             removeUnusedAssigns(opRef, toRemove, context);
         }
-        return smthToRemove;
+        return !toRemove.isEmpty();
     }
 
     private void removeUnusedAssigns(Mutable<ILogicalOperator> opRef, Set<LogicalVariable> toRemove,
@@ -87,28 +92,59 @@
 
     private int removeFromAssigns(AbstractLogicalOperator op, Set<LogicalVariable> toRemove,
             IOptimizationContext context) throws AlgebricksException {
-        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
-            AssignOperator assign = (AssignOperator) op;
-            if (removeUnusedVarsAndExprs(toRemove, assign.getVariables(), assign.getExpressions())) {
-                context.computeAndSetTypeEnvironmentForOperator(assign);
+        switch (op.getOperatorTag()) {
+            case ASSIGN: {
+                AssignOperator assign = (AssignOperator) op;
+                if (removeUnusedVarsAndExprs(toRemove, assign.getVariables(), assign.getExpressions())) {
+                    context.computeAndSetTypeEnvironmentForOperator(assign);
+                }
+                return assign.getVariables().size();
             }
-            return assign.getVariables().size();
-        } else if (op.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
-            AggregateOperator agg = (AggregateOperator) op;
-            if (removeUnusedVarsAndExprs(toRemove, agg.getVariables(), agg.getExpressions())) {
-                context.computeAndSetTypeEnvironmentForOperator(agg);
+            case AGGREGATE: {
+                AggregateOperator agg = (AggregateOperator) op;
+                if (removeUnusedVarsAndExprs(toRemove, agg.getVariables(), agg.getExpressions())) {
+                    context.computeAndSetTypeEnvironmentForOperator(agg);
+                }
+                return agg.getVariables().size();
             }
-            return agg.getVariables().size();
-        } else if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
-            UnnestOperator uOp = (UnnestOperator) op;
-            LogicalVariable pVar = uOp.getPositionalVariable();
-            if (pVar != null && toRemove.contains(pVar)) {
-                uOp.setPositionalVariable(null);
+            case UNNEST: {
+                UnnestOperator uOp = (UnnestOperator) op;
+                LogicalVariable pVar = uOp.getPositionalVariable();
+                if (pVar != null && toRemove.contains(pVar)) {
+                    uOp.setPositionalVariable(null);
+                }
+                break;
+            }
+            case UNIONALL: {
+                UnionAllOperator unionOp = (UnionAllOperator) op;
+                if (removeUnusedVarsFromUnionAll(unionOp, toRemove)) {
+                    context.computeAndSetTypeEnvironmentForOperator(unionOp);
+                }
+                return unionOp.getVariableMappings().size();
             }
         }
         return -1;
     }
 
+    private boolean removeUnusedVarsFromUnionAll(UnionAllOperator unionOp, Set<LogicalVariable> toRemove) {
+        Iterator<Triple<LogicalVariable, LogicalVariable, LogicalVariable>> iter = unionOp.getVariableMappings()
+                .iterator();
+        boolean modified = false;
+        Set<LogicalVariable> removeFromRemoveSet = new HashSet<LogicalVariable>();
+        while (iter.hasNext()) {
+            Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping = iter.next();
+            if (toRemove.contains(varMapping.third)) {
+                iter.remove();
+                modified = true;
+            }
+            // In any case, make sure we do not removing these variables.
+            removeFromRemoveSet.add(varMapping.first);
+            removeFromRemoveSet.add(varMapping.second);
+        }
+        toRemove.removeAll(removeFromRemoveSet);
+        return modified;
+    }
+
     private boolean removeUnusedVarsAndExprs(Set<LogicalVariable> toRemove, List<LogicalVariable> varList,
             List<Mutable<ILogicalExpression>> exprList) {
         boolean changed = false;
@@ -142,22 +178,41 @@
                 }
             }
         }
-        if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
-            AssignOperator assign = (AssignOperator) op;
-            toRemove.addAll(assign.getVariables());
-        } else if (op.getOperatorTag() == LogicalOperatorTag.AGGREGATE) {
-            AggregateOperator agg = (AggregateOperator) op;
-            toRemove.addAll(agg.getVariables());
-        } else if (op.getOperatorTag() == LogicalOperatorTag.UNNEST) {
-            UnnestOperator uOp = (UnnestOperator) op;
-            LogicalVariable pVar = uOp.getPositionalVariable();
-            if (pVar != null) {
-                toRemove.add(pVar);
+        boolean removeUsedVars = true;
+        switch (op.getOperatorTag()) {
+            case ASSIGN: {
+                AssignOperator assign = (AssignOperator) op;
+                toRemove.addAll(assign.getVariables());
+                break;
+            }
+            case AGGREGATE: {
+                AggregateOperator agg = (AggregateOperator) op;
+                toRemove.addAll(agg.getVariables());
+                break;
+            }
+            case UNNEST: {
+                UnnestOperator uOp = (UnnestOperator) op;
+                LogicalVariable pVar = uOp.getPositionalVariable();
+                if (pVar != null) {
+                    toRemove.add(pVar);
+                }
+                break;
+            }
+            case UNIONALL: {
+                UnionAllOperator unionOp = (UnionAllOperator) op;
+                for (Triple<LogicalVariable, LogicalVariable, LogicalVariable> varMapping : unionOp
+                        .getVariableMappings()) {
+                    toRemove.add(varMapping.third);
+                }
+                removeUsedVars = false;
+                break;
             }
         }
-        List<LogicalVariable> used = new LinkedList<LogicalVariable>();
-        VariableUtilities.getUsedVariables(op, used);
-        toRemove.removeAll(used);
+        if (removeUsedVars) {
+            List<LogicalVariable> used = new LinkedList<LogicalVariable>();
+            VariableUtilities.getUsedVariables(op, used);
+            toRemove.removeAll(used);
+        }
     }
 
 }
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
index 0485380..1fb973e 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
+++ b/hivesterix/src/main/java/edu/uci/ics/hivesterix/logical/plan/HiveAlgebricksTranslator.java
@@ -80,770 +80,729 @@
 @SuppressWarnings("rawtypes")

 public class HiveAlgebricksTranslator implements Translator {

 

-	private int currentVariable = 0;

+    private int currentVariable = 0;

 

-	private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();

+    private List<Mutable<ILogicalOperator>> logicalOp = new ArrayList<Mutable<ILogicalOperator>>();

 

-	private boolean continueTraverse = true;

+    private boolean continueTraverse = true;

 

-	private IMetadataProvider<PartitionDesc, Object> metaData;

+    private IMetadataProvider<PartitionDesc, Object> metaData;

 

-	/**

-	 * map variable name to the logical variable

-	 */

-	private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();

+    /**

+     * map variable name to the logical variable

+     */

+    private HashMap<String, LogicalVariable> nameToLogicalVariableMap = new HashMap<String, LogicalVariable>();

 

-	/**

-	 * map field name to LogicalVariable

-	 */

-	private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();

+    /**

+     * map field name to LogicalVariable

+     */

+    private HashMap<String, LogicalVariable> fieldToLogicalVariableMap = new HashMap<String, LogicalVariable>();

 

-	/**

-	 * map logical variable to name

-	 */

-	private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();

+    /**

+     * map logical variable to name

+     */

+    private HashMap<LogicalVariable, String> logicalVariableToFieldMap = new HashMap<LogicalVariable, String>();

 

-	/**

-	 * asterix root operators

-	 */

-	private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();

+    /**

+     * asterix root operators

+     */

+    private List<Mutable<ILogicalOperator>> rootOperators = new ArrayList<Mutable<ILogicalOperator>>();

 

-	/**

-	 * a list of visitors

-	 */

-	private List<Visitor> visitors = new ArrayList<Visitor>();

+    /**

+     * a list of visitors

+     */

+    private List<Visitor> visitors = new ArrayList<Visitor>();

 

-	/**

-	 * output writer to print things out

-	 */

-	private static PrintWriter outputWriter = new PrintWriter(

-			new OutputStreamWriter(System.out));

+    /**

+     * output writer to print things out

+     */

+    private static PrintWriter outputWriter = new PrintWriter(new OutputStreamWriter(System.out));

 

-	/**

-	 * map a logical variable to type info

-	 */

-	private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();

+    /**

+     * map a logical variable to type info

+     */

+    private HashMap<LogicalVariable, TypeInfo> variableToType = new HashMap<LogicalVariable, TypeInfo>();

 

-	@Override

-	public LogicalVariable getVariable(String fieldName, TypeInfo type) {

-		LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);

-		if (var == null) {

-			currentVariable++;

-			var = new LogicalVariable(currentVariable);

-			fieldToLogicalVariableMap.put(fieldName, var);

-			nameToLogicalVariableMap.put(var.toString(), var);

-			variableToType.put(var, type);

-			logicalVariableToFieldMap.put(var, fieldName);

-		}

-		return var;

-	}

+    @Override

+    public LogicalVariable getVariable(String fieldName, TypeInfo type) {

+        LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);

+        if (var == null) {

+            currentVariable++;

+            var = new LogicalVariable(currentVariable);

+            fieldToLogicalVariableMap.put(fieldName, var);

+            nameToLogicalVariableMap.put(var.toString(), var);

+            variableToType.put(var, type);

+            logicalVariableToFieldMap.put(var, fieldName);

+        }

+        return var;

+    }

 

-	@Override

-	public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {

-		currentVariable++;

-		LogicalVariable var = new LogicalVariable(currentVariable);

-		fieldToLogicalVariableMap.put(fieldName, var);

-		nameToLogicalVariableMap.put(var.toString(), var);

-		variableToType.put(var, type);

-		logicalVariableToFieldMap.put(var, fieldName);

-		return var;

-	}

+    @Override

+    public LogicalVariable getNewVariable(String fieldName, TypeInfo type) {

+        currentVariable++;

+        LogicalVariable var = new LogicalVariable(currentVariable);

+        fieldToLogicalVariableMap.put(fieldName, var);

+        nameToLogicalVariableMap.put(var.toString(), var);

+        variableToType.put(var, type);

+        logicalVariableToFieldMap.put(var, fieldName);

+        return var;

+    }

 

-	@Override

-	public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {

-		String name = this.logicalVariableToFieldMap.get(oldVar);

-		if (name != null) {

-			fieldToLogicalVariableMap.put(name, newVar);

-			nameToLogicalVariableMap.put(newVar.toString(), newVar);

-			nameToLogicalVariableMap.put(oldVar.toString(), newVar);

-			logicalVariableToFieldMap.put(newVar, name);

-		}

-	}

+    @Override

+    public void replaceVariable(LogicalVariable oldVar, LogicalVariable newVar) {

+        String name = this.logicalVariableToFieldMap.get(oldVar);

+        if (name != null) {

+            fieldToLogicalVariableMap.put(name, newVar);

+            nameToLogicalVariableMap.put(newVar.toString(), newVar);

+            nameToLogicalVariableMap.put(oldVar.toString(), newVar);

+            logicalVariableToFieldMap.put(newVar, name);

+        }

+    }

 

-	@Override

-	public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {

-		return metaData;

-	}

+    @Override

+    public IMetadataProvider<PartitionDesc, Object> getMetadataProvider() {

+        return metaData;

+    }

 

-	/**

-	 * only get an variable, without rewriting it

-	 * 

-	 * @param fieldName

-	 * @return

-	 */

-	private LogicalVariable getVariableOnly(String fieldName) {

-		return fieldToLogicalVariableMap.get(fieldName);

-	}

+    /**

+     * only get an variable, without rewriting it

+     * 

+     * @param fieldName

+     * @return

+     */

+    private LogicalVariable getVariableOnly(String fieldName) {

+        return fieldToLogicalVariableMap.get(fieldName);

+    }

 

-	private void updateVariable(String fieldName, LogicalVariable variable) {

-		LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);

-		if (var == null) {

-			fieldToLogicalVariableMap.put(fieldName, variable);

-			nameToLogicalVariableMap.put(fieldName, variable);

-		} else if (!var.equals(variable)) {

-			// System.out.println("!!!replace variables!!!");

-			fieldToLogicalVariableMap.put(fieldName, variable);

-			nameToLogicalVariableMap.put(fieldName, variable);

-		}

-	}

+    private void updateVariable(String fieldName, LogicalVariable variable) {

+        LogicalVariable var = fieldToLogicalVariableMap.get(fieldName);

+        if (var == null) {

+            fieldToLogicalVariableMap.put(fieldName, variable);

+            nameToLogicalVariableMap.put(fieldName, variable);

+        } else if (!var.equals(variable)) {

+            // System.out.println("!!!replace variables!!!");

+            fieldToLogicalVariableMap.put(fieldName, variable);

+            nameToLogicalVariableMap.put(fieldName, variable);

+        }

+    }

 

-	/**

-	 * get a list of logical variables from the schema

-	 * 

-	 * @param schema

-	 * @return

-	 */

-	@Override

-	public List<LogicalVariable> getVariablesFromSchema(Schema schema) {

-		List<LogicalVariable> variables = new ArrayList<LogicalVariable>();

-		List<String> names = schema.getNames();

+    /**

+     * get a list of logical variables from the schema

+     * 

+     * @param schema

+     * @return

+     */

+    @Override

+    public List<LogicalVariable> getVariablesFromSchema(Schema schema) {

+        List<LogicalVariable> variables = new ArrayList<LogicalVariable>();

+        List<String> names = schema.getNames();

 

-		for (String name : names)

-			variables.add(nameToLogicalVariableMap.get(name));

-		return variables;

-	}

+        for (String name : names)

+            variables.add(nameToLogicalVariableMap.get(name));

+        return variables;

+    }

 

-	/**

-	 * get variable to typeinfo map

-	 * 

-	 * @return

-	 */

-	public HashMap<LogicalVariable, TypeInfo> getVariableContext() {

-		return this.variableToType;

-	}

+    /**

+     * get variable to typeinfo map

+     * 

+     * @return

+     */

+    public HashMap<LogicalVariable, TypeInfo> getVariableContext() {

+        return this.variableToType;

+    }

 

-	/**

-	 * get the number of variables

-	 * s

-	 * @return

-	 */

-	public int getVariableCounter() {

-		return currentVariable + 1;

-	}

+    /**

+     * get the number of variables

+     * s

+     * 

+     * @return

+     */

+    public int getVariableCounter() {

+        return currentVariable + 1;

+    }

 

-	/**

-	 * translate from hive operator tree to asterix operator tree

-	 * 

-	 * @param hive

-	 *            roots

-	 * @return Algebricks roots

-	 */

-	public void translate(List<Operator> hiveRoot,

-			ILogicalOperator parentOperator,

-			HashMap<String, PartitionDesc> aliasToPathMap)

-			throws AlgebricksException {

-		/**

-		 * register visitors

-		 */

-		visitors.add(new FilterVisitor());

-		visitors.add(new GroupByVisitor());

-		visitors.add(new JoinVisitor());

-		visitors.add(new LateralViewJoinVisitor());

-		visitors.add(new UnionVisitor());

-		visitors.add(new LimitVisitor());

-		visitors.add(new MapJoinVisitor());

-		visitors.add(new ProjectVisitor());

-		visitors.add(new SortVisitor());

-		visitors.add(new ExtractVisitor());

-		visitors.add(new TableScanWriteVisitor(aliasToPathMap));

+    /**

+     * translate from hive operator tree to asterix operator tree

+     * 

+     * @param hive

+     *            roots

+     * @return Algebricks roots

+     */

+    public void translate(List<Operator> hiveRoot, ILogicalOperator parentOperator,

+            HashMap<String, PartitionDesc> aliasToPathMap) throws AlgebricksException {

+        /**

+         * register visitors

+         */

+        visitors.add(new FilterVisitor());

+        visitors.add(new GroupByVisitor());

+        visitors.add(new JoinVisitor());

+        visitors.add(new LateralViewJoinVisitor());

+        visitors.add(new UnionVisitor());

+        visitors.add(new LimitVisitor());

+        visitors.add(new MapJoinVisitor());

+        visitors.add(new ProjectVisitor());

+        visitors.add(new SortVisitor());

+        visitors.add(new ExtractVisitor());

+        visitors.add(new TableScanWriteVisitor(aliasToPathMap));

 

-		List<Mutable<ILogicalOperator>> refList = translate(hiveRoot,

-				new MutableObject<ILogicalOperator>(parentOperator));

-		insertReplicateOperator(refList);

-		if (refList != null)

-			rootOperators.addAll(refList);

-	}

+        List<Mutable<ILogicalOperator>> refList = translate(hiveRoot, new MutableObject<ILogicalOperator>(

+                parentOperator));

+        insertReplicateOperator(refList);

+        if (refList != null)

+            rootOperators.addAll(refList);

+    }

 

-	/**

-	 * translate operator DAG

-	 * 

-	 * @param hiveRoot

-	 * @param AlgebricksParentOperator

-	 * @return

-	 */

-	private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,

-			Mutable<ILogicalOperator> AlgebricksParentOperator)

-			throws AlgebricksException {

+    /**

+     * translate operator DAG

+     * 

+     * @param hiveRoot

+     * @param AlgebricksParentOperator

+     * @return

+     */

+    private List<Mutable<ILogicalOperator>> translate(List<Operator> hiveRoot,

+            Mutable<ILogicalOperator> AlgebricksParentOperator) throws AlgebricksException {

 

-		for (Operator hiveOperator : hiveRoot) {

-			continueTraverse = true;

-			Mutable<ILogicalOperator> currentOperatorRef = null;

-			if (hiveOperator.getType() == OperatorType.FILTER) {

-				FilterOperator fop = (FilterOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.REDUCESINK) {

-				ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.JOIN) {

-				JoinOperator fop = (JoinOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null) {

-						continueTraverse = true;

-						break;

-					} else

-						continueTraverse = false;

-				}

-				if (currentOperatorRef == null)

-					return null;

-			} else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {

-				LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-				if (currentOperatorRef == null)

-					return null;

-			} else if (hiveOperator.getType() == OperatorType.MAPJOIN) {

-				MapJoinOperator fop = (MapJoinOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null) {

-						continueTraverse = true;

-						break;

-					} else

-						continueTraverse = false;

-				}

-				if (currentOperatorRef == null)

-					return null;

-			} else if (hiveOperator.getType() == OperatorType.SELECT) {

-				SelectOperator fop = (SelectOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.EXTRACT) {

-				ExtractOperator fop = (ExtractOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.GROUPBY) {

-				GroupByOperator fop = (GroupByOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.TABLESCAN) {

-				TableScanOperator fop = (TableScanOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.FILESINK) {

-				FileSinkOperator fop = (FileSinkOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(fop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.LIMIT) {

-				LimitOperator lop = (LimitOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(lop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.UDTF) {

-				UDTFOperator lop = (UDTFOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(lop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null)

-						break;

-				}

-			} else if (hiveOperator.getType() == OperatorType.UNION) {

-				UnionOperator lop = (UnionOperator) hiveOperator;

-				for (Visitor visitor : visitors) {

-					currentOperatorRef = visitor.visit(lop,

-							AlgebricksParentOperator, this);

-					if (currentOperatorRef != null) {

-						continueTraverse = true;

-						break;

-					} else

-						continueTraverse = false;

-				}

-			} else

-				;

-			if (hiveOperator.getChildOperators() != null

-					&& hiveOperator.getChildOperators().size() > 0

-					&& continueTraverse) {

-				@SuppressWarnings("unchecked")

-				List<Operator> children = hiveOperator.getChildOperators();

-				if (currentOperatorRef == null)

-					currentOperatorRef = AlgebricksParentOperator;

-				translate(children, currentOperatorRef);

-			}

-			if (hiveOperator.getChildOperators() == null

-					|| hiveOperator.getChildOperators().size() == 0)

-				logicalOp.add(currentOperatorRef);

-		}

-		return logicalOp;

-	}

+        for (Operator hiveOperator : hiveRoot) {

+            continueTraverse = true;

+            Mutable<ILogicalOperator> currentOperatorRef = null;

+            if (hiveOperator.getType() == OperatorType.FILTER) {

+                FilterOperator fop = (FilterOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.REDUCESINK) {

+                ReduceSinkOperator fop = (ReduceSinkOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.JOIN) {

+                JoinOperator fop = (JoinOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null) {

+                        continueTraverse = true;

+                        break;

+                    } else

+                        continueTraverse = false;

+                }

+                if (currentOperatorRef == null)

+                    return null;

+            } else if (hiveOperator.getType() == OperatorType.LATERALVIEWJOIN) {

+                LateralViewJoinOperator fop = (LateralViewJoinOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+                if (currentOperatorRef == null)

+                    return null;

+            } else if (hiveOperator.getType() == OperatorType.MAPJOIN) {

+                MapJoinOperator fop = (MapJoinOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null) {

+                        continueTraverse = true;

+                        break;

+                    } else

+                        continueTraverse = false;

+                }

+                if (currentOperatorRef == null)

+                    return null;

+            } else if (hiveOperator.getType() == OperatorType.SELECT) {

+                SelectOperator fop = (SelectOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.EXTRACT) {

+                ExtractOperator fop = (ExtractOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.GROUPBY) {

+                GroupByOperator fop = (GroupByOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.TABLESCAN) {

+                TableScanOperator fop = (TableScanOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.FILESINK) {

+                FileSinkOperator fop = (FileSinkOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(fop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.LIMIT) {

+                LimitOperator lop = (LimitOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.UDTF) {

+                UDTFOperator lop = (UDTFOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null)

+                        break;

+                }

+            } else if (hiveOperator.getType() == OperatorType.UNION) {

+                UnionOperator lop = (UnionOperator) hiveOperator;

+                for (Visitor visitor : visitors) {

+                    currentOperatorRef = visitor.visit(lop, AlgebricksParentOperator, this);

+                    if (currentOperatorRef != null) {

+                        continueTraverse = true;

+                        break;

+                    } else

+                        continueTraverse = false;

+                }

+            } else

+                ;

+            if (hiveOperator.getChildOperators() != null && hiveOperator.getChildOperators().size() > 0

+                    && continueTraverse) {

+                @SuppressWarnings("unchecked")

+                List<Operator> children = hiveOperator.getChildOperators();

+                if (currentOperatorRef == null)

+                    currentOperatorRef = AlgebricksParentOperator;

+                translate(children, currentOperatorRef);

+            }

+            if (hiveOperator.getChildOperators() == null || hiveOperator.getChildOperators().size() == 0)

+                logicalOp.add(currentOperatorRef);

+        }

+        return logicalOp;

+    }

 

-	/**

-	 * used in select, group by to get no-column-expression columns

-	 * 

-	 * @param cols

-	 * @return

-	 */

-	public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent,

-			List<ExprNodeDesc> cols, ArrayList<LogicalVariable> variables) {

+    /**

+     * used in select, group by to get no-column-expression columns

+     * 

+     * @param cols

+     * @return

+     */

+    public ILogicalOperator getAssignOperator(Mutable<ILogicalOperator> parent, List<ExprNodeDesc> cols,

+            ArrayList<LogicalVariable> variables) {

 

-		ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();

+        ArrayList<Mutable<ILogicalExpression>> expressions = new ArrayList<Mutable<ILogicalExpression>>();

 

-		/**

-		 * variables to be appended in the assign operator

-		 */

-		ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();

+        /**

+         * variables to be appended in the assign operator

+         */

+        ArrayList<LogicalVariable> appendedVariables = new ArrayList<LogicalVariable>();

 

-		// one variable can only be assigned once

-		for (ExprNodeDesc hiveExpr : cols) {

-			rewriteExpression(hiveExpr);

+        // one variable can only be assigned once

+        for (ExprNodeDesc hiveExpr : cols) {

+            rewriteExpression(hiveExpr);

 

-			if (hiveExpr instanceof ExprNodeColumnDesc) {

-				ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;

-				String fieldName = desc2.getTabAlias() + "."

-						+ desc2.getColumn();

+            if (hiveExpr instanceof ExprNodeColumnDesc) {

+                ExprNodeColumnDesc desc2 = (ExprNodeColumnDesc) hiveExpr;

+                String fieldName = desc2.getTabAlias() + "." + desc2.getColumn();

 

-				// System.out.println("project expr: " + fieldName);

+                // System.out.println("project expr: " + fieldName);

 

-				if (fieldName.indexOf("$$") < 0) {

-					LogicalVariable var = getVariable(fieldName,

-							hiveExpr.getTypeInfo());

-					desc2.setColumn(var.toString());

-					desc2.setTabAlias("");

-					variables.add(var);

-				} else {

-					LogicalVariable var = nameToLogicalVariableMap.get(desc2

-							.getColumn());

-					String name = this.logicalVariableToFieldMap.get(var);

-					var = this.getVariableOnly(name);

-					variables.add(var);

-				}

-			} else {

-				Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);

-				expressions.add(asterixExpr);

-				LogicalVariable var = getVariable(hiveExpr.getExprString()

-						+ asterixExpr.hashCode(), hiveExpr.getTypeInfo());

-				variables.add(var);

-				appendedVariables.add(var);

-			}

-		}

+                if (fieldName.indexOf("$$") < 0) {

+                    LogicalVariable var = getVariable(fieldName, hiveExpr.getTypeInfo());

+                    desc2.setColumn(var.toString());

+                    desc2.setTabAlias("");

+                    variables.add(var);

+                } else {

+                    LogicalVariable var = nameToLogicalVariableMap.get(desc2.getColumn());

+                    String name = this.logicalVariableToFieldMap.get(var);

+                    var = this.getVariableOnly(name);

+                    variables.add(var);

+                }

+            } else {

+                Mutable<ILogicalExpression> asterixExpr = translateScalarFucntion(hiveExpr);

+                expressions.add(asterixExpr);

+                LogicalVariable var = getVariable(hiveExpr.getExprString() + asterixExpr.hashCode(),

+                        hiveExpr.getTypeInfo());

+                variables.add(var);

+                appendedVariables.add(var);

+            }

+        }

 

-		/**

-		 * create an assign operator to deal with appending

-		 */

-		ILogicalOperator assignOp = null;

-		if (appendedVariables.size() > 0) {

-			assignOp = new AssignOperator(appendedVariables, expressions);

-			assignOp.getInputs().add(parent);

-		}

-		return assignOp;

-	}

+        /**

+         * create an assign operator to deal with appending

+         */

+        ILogicalOperator assignOp = null;

+        if (appendedVariables.size() > 0) {

+            assignOp = new AssignOperator(appendedVariables, expressions);

+            assignOp.getInputs().add(parent);

+        }

+        return assignOp;

+    }

 

-	private ILogicalPlan plan;

+    private ILogicalPlan plan;

 

-	public ILogicalPlan genLogicalPlan() {

-		plan = new ALogicalPlanImpl(rootOperators);

-		return plan;

-	}

+    public ILogicalPlan genLogicalPlan() {

+        plan = new ALogicalPlanImpl(rootOperators);

+        return plan;

+    }

 

-	public void printOperators() throws AlgebricksException {

-		LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

-		StringBuilder buffer = new StringBuilder();

-		PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

-		outputWriter.println(buffer);

-		outputWriter.println("rewritten variables: ");

-		outputWriter.flush();

-		printVariables();

+    public void printOperators() throws AlgebricksException {

+        LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

+        StringBuilder buffer = new StringBuilder();

+        PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

+        outputWriter.println(buffer);

+        outputWriter.println("rewritten variables: ");

+        outputWriter.flush();

+        printVariables();

 

-	}

+    }

 

-	public static void setOutputPrinter(PrintWriter writer) {

-		outputWriter = writer;

-	}

+    public static void setOutputPrinter(PrintWriter writer) {

+        outputWriter = writer;

+    }

 

-	private void printVariables() {

-		Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap

-				.entrySet();

+    private void printVariables() {

+        Set<Entry<String, LogicalVariable>> entries = fieldToLogicalVariableMap.entrySet();

 

-		for (Entry<String, LogicalVariable> entry : entries) {

-			outputWriter.println(entry.getKey() + " -> " + entry.getValue());

-		}

-		outputWriter.flush();

-	}

+        for (Entry<String, LogicalVariable> entry : entries) {

+            outputWriter.println(entry.getKey() + " -> " + entry.getValue());

+        }

+        outputWriter.flush();

+    }

 

-	/**

-	 * generate the object inspector for the output of an operator

-	 * 

-	 * @param operator

-	 *            The Hive operator

-	 * @return an ObjectInspector object

-	 */

-	public Schema generateInputSchema(Operator operator) {

-		List<String> variableNames = new ArrayList<String>();

-		List<TypeInfo> typeList = new ArrayList<TypeInfo>();

-		List<ColumnInfo> columns = operator.getSchema().getSignature();

+    /**

+     * generate the object inspector for the output of an operator

+     * 

+     * @param operator

+     *            The Hive operator

+     * @return an ObjectInspector object

+     */

+    public Schema generateInputSchema(Operator operator) {

+        List<String> variableNames = new ArrayList<String>();

+        List<TypeInfo> typeList = new ArrayList<TypeInfo>();

+        List<ColumnInfo> columns = operator.getSchema().getSignature();

 

-		for (ColumnInfo col : columns) {

-			// typeList.add();

-			TypeInfo type = col.getType();

-			typeList.add(type);

+        for (ColumnInfo col : columns) {

+            // typeList.add();

+            TypeInfo type = col.getType();

+            typeList.add(type);

 

-			String fieldName = col.getInternalName();

-			variableNames.add(fieldName);

-		}

+            String fieldName = col.getInternalName();

+            variableNames.add(fieldName);

+        }

 

-		return new Schema(variableNames, typeList);

-	}

+        return new Schema(variableNames, typeList);

+    }

 

-	/**

-	 * rewrite the names of output columns for feature expression evaluators to

-	 * use

-	 * 

-	 * @param operator

-	 */

-	public void rewriteOperatorOutputSchema(Operator operator) {

-		List<ColumnInfo> columns = operator.getSchema().getSignature();

+    /**

+     * rewrite the names of output columns for feature expression evaluators to

+     * use

+     * 

+     * @param operator

+     */

+    public void rewriteOperatorOutputSchema(Operator operator) {

+        List<ColumnInfo> columns = operator.getSchema().getSignature();

 

-		for (ColumnInfo column : columns) {

-			String columnName = column.getTabAlias() + "."

-					+ column.getInternalName();

-			if (columnName.indexOf("$$") < 0) {

-				LogicalVariable var = getVariable(columnName, column.getType());

-				column.setInternalName(var.toString());

-			}

-		}

-	}

+        for (ColumnInfo column : columns) {

+            String columnName = column.getTabAlias() + "." + column.getInternalName();

+            if (columnName.indexOf("$$") < 0) {

+                LogicalVariable var = getVariable(columnName, column.getType());

+                column.setInternalName(var.toString());

+            }

+        }

+    }

 

-	@Override

-	public void rewriteOperatorOutputSchema(List<LogicalVariable> variables,

-			Operator operator) {

+    @Override

+    public void rewriteOperatorOutputSchema(List<LogicalVariable> variables, Operator operator) {

 

-		printOperatorSchema(operator);

-		List<ColumnInfo> columns = operator.getSchema().getSignature();

-		if (variables.size() != columns.size()) {

-			System.out.println("output cardinality error " + operator.getName()

-					+ " variable size: " + variables.size() + " expected "

-					+ columns.size());

-		}

+        printOperatorSchema(operator);

+        List<ColumnInfo> columns = operator.getSchema().getSignature();

+        if (variables.size() != columns.size()) {

+            throw new IllegalStateException("output cardinality error " + operator.getName() + " variable size: "

+                    + variables.size() + " expected " + columns.size());

+        }

 

-		for (int i = 0; i < variables.size(); i++) {

-			LogicalVariable var = variables.get(i);

-			ColumnInfo column = columns.get(i);

-			String fieldName = column.getTabAlias() + "."

-					+ column.getInternalName();

-			if (fieldName.indexOf("$$") < 0) {

-				updateVariable(fieldName, var);

-				column.setInternalName(var.toString());

-			}

-		}

-		printOperatorSchema(operator);

-	}

+        for (int i = 0; i < variables.size(); i++) {

+            LogicalVariable var = variables.get(i);

+            ColumnInfo column = columns.get(i);

+            String fieldName = column.getTabAlias() + "." + column.getInternalName();

+            if (fieldName.indexOf("$$") < 0) {

+                updateVariable(fieldName, var);

+                column.setInternalName(var.toString());

+            }

+        }

+        printOperatorSchema(operator);

+    }

 

-	/**

-	 * rewrite an expression and substitute variables

-	 * 

-	 * @param expr

-	 *            hive expression

-	 */

-	public void rewriteExpression(ExprNodeDesc expr) {

-		if (expr instanceof ExprNodeColumnDesc) {

-			ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;

-			String fieldName = desc.getTabAlias() + "." + desc.getColumn();

-			if (fieldName.indexOf("$$") < 0) {

-				LogicalVariable var = getVariableOnly(fieldName);

-				if (var == null) {

-					fieldName = "." + desc.getColumn();

-					var = getVariableOnly(fieldName);

-					if (var == null) {

-						fieldName = "null." + desc.getColumn();

-						var = getVariableOnly(fieldName);

-						if (var == null) {

-							System.out.println(fieldName + " is wrong!!! ");

-						}

-					}

-				}

-				String name = this.logicalVariableToFieldMap.get(var);

-				var = getVariableOnly(name);

-				desc.setColumn(var.toString());

-			}

-		} else {

-			if (expr.getChildren() != null && expr.getChildren().size() > 0) {

-				List<ExprNodeDesc> children = expr.getChildren();

-				for (ExprNodeDesc desc : children)

-					rewriteExpression(desc);

-			}

-		}

-	}

+    /**

+     * rewrite an expression and substitute variables

+     * 

+     * @param expr

+     *            hive expression

+     */

+    public void rewriteExpression(ExprNodeDesc expr) {

+        if (expr instanceof ExprNodeColumnDesc) {

+            ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;

+            String fieldName = desc.getTabAlias() + "." + desc.getColumn();

+            if (fieldName.indexOf("$$") < 0) {

+                LogicalVariable var = getVariableOnly(fieldName);

+                if (var == null) {

+                    fieldName = "." + desc.getColumn();

+                    var = getVariableOnly(fieldName);

+                    if (var == null) {

+                        fieldName = "null." + desc.getColumn();

+                        var = getVariableOnly(fieldName);

+                        if (var == null) {

+                            throw new IllegalStateException(fieldName + " is wrong!!! ");

+                        }

+                    }

+                }

+                String name = this.logicalVariableToFieldMap.get(var);

+                var = getVariableOnly(name);

+                desc.setColumn(var.toString());

+            }

+        } else {

+            if (expr.getChildren() != null && expr.getChildren().size() > 0) {

+                List<ExprNodeDesc> children = expr.getChildren();

+                for (ExprNodeDesc desc : children)

+                    rewriteExpression(desc);

+            }

+        }

+    }

 

-	/**

-	 * rewrite an expression and substitute variables

-	 * 

-	 * @param expr

-	 *            hive expression

-	 */

-	public void rewriteExpressionPartial(ExprNodeDesc expr) {

-		if (expr instanceof ExprNodeColumnDesc) {

-			ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;

-			String fieldName = desc.getTabAlias() + "." + desc.getColumn();

-			if (fieldName.indexOf("$$") < 0) {

-				LogicalVariable var = getVariableOnly(fieldName);

-				desc.setColumn(var.toString());

-			}

-		} else {

-			if (expr.getChildren() != null && expr.getChildren().size() > 0) {

-				List<ExprNodeDesc> children = expr.getChildren();

-				for (ExprNodeDesc desc : children)

-					rewriteExpressionPartial(desc);

-			}

-		}

-	}

+    /**

+     * rewrite an expression and substitute variables

+     * 

+     * @param expr

+     *            hive expression

+     */

+    public void rewriteExpressionPartial(ExprNodeDesc expr) {

+        if (expr instanceof ExprNodeColumnDesc) {

+            ExprNodeColumnDesc desc = (ExprNodeColumnDesc) expr;

+            String fieldName = desc.getTabAlias() + "." + desc.getColumn();

+            if (fieldName.indexOf("$$") < 0) {

+                LogicalVariable var = getVariableOnly(fieldName);

+                desc.setColumn(var.toString());

+            }

+        } else {

+            if (expr.getChildren() != null && expr.getChildren().size() > 0) {

+                List<ExprNodeDesc> children = expr.getChildren();

+                for (ExprNodeDesc desc : children)

+                    rewriteExpressionPartial(desc);

+            }

+        }

+    }

 

-	private void printOperatorSchema(Operator operator) {

-		System.out.println(operator.getName());

-		List<ColumnInfo> columns = operator.getSchema().getSignature();

-		for (ColumnInfo column : columns) {

-			System.out.print(column.getTabAlias() + "."

-					+ column.getInternalName() + "  ");

-		}

-		System.out.println();

-	}

+    private void printOperatorSchema(Operator operator) {

+        System.out.println(operator.getName());

+        List<ColumnInfo> columns = operator.getSchema().getSignature();

+        for (ColumnInfo column : columns) {

+            System.out.print(column.getTabAlias() + "." + column.getInternalName() + "  ");

+        }

+        System.out.println();

+    }

 

-	/**

-	 * translate scalar function expression

-	 * 

-	 * @param hiveExpr

-	 * @return

-	 */

-	public Mutable<ILogicalExpression> translateScalarFucntion(

-			ExprNodeDesc hiveExpr) {

-		ILogicalExpression AlgebricksExpr;

+    /**

+     * translate scalar function expression

+     * 

+     * @param hiveExpr

+     * @return

+     */

+    public Mutable<ILogicalExpression> translateScalarFucntion(ExprNodeDesc hiveExpr) {

+        ILogicalExpression AlgebricksExpr;

 

-		if (hiveExpr instanceof ExprNodeGenericFuncDesc) {

-			List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();

-			List<ExprNodeDesc> children = hiveExpr.getChildren();

+        if (hiveExpr instanceof ExprNodeGenericFuncDesc) {

+            List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();

+            List<ExprNodeDesc> children = hiveExpr.getChildren();

 

-			for (ExprNodeDesc child : children)

-				arguments.add(translateScalarFucntion(child));

+            for (ExprNodeDesc child : children)

+                arguments.add(translateScalarFucntion(child));

 

-			ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;

-			GenericUDF genericUdf = funcExpr.getGenericUDF();

-			UDF udf = null;

-			if (genericUdf instanceof GenericUDFBridge) {

-				GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;

-				try {

-					udf = bridge.getUdfClass().newInstance();

-				} catch (Exception e) {

-					e.printStackTrace();

-				}

-			}

+            ExprNodeGenericFuncDesc funcExpr = (ExprNodeGenericFuncDesc) hiveExpr;

+            GenericUDF genericUdf = funcExpr.getGenericUDF();

+            UDF udf = null;

+            if (genericUdf instanceof GenericUDFBridge) {

+                GenericUDFBridge bridge = (GenericUDFBridge) genericUdf;

+                try {

+                    udf = bridge.getUdfClass().newInstance();

+                } catch (Exception e) {

+                    e.printStackTrace();

+                }

+            }

 

-			/**

-			 * set up the hive function

-			 */

-			Object hiveFunction = genericUdf;

-			if (udf != null)

-				hiveFunction = udf;

+            /**

+             * set up the hive function

+             */

+            Object hiveFunction = genericUdf;

+            if (udf != null)

+                hiveFunction = udf;

 

-			FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE

-					.getAlgebricksFunctionId(hiveFunction.getClass());

-			if (funcId == null) {

-				funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,

-						hiveFunction.getClass().getName());

-			}

+            FunctionIdentifier funcId = HiveAlgebricksBuiltInFunctionMap.INSTANCE.getAlgebricksFunctionId(hiveFunction

+                    .getClass());

+            if (funcId == null) {

+                funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, hiveFunction.getClass().getName());

+            }

 

-			Object functionInfo = null;

-			if (genericUdf instanceof GenericUDFBridge) {

-				functionInfo = funcExpr;

-			}

+            Object functionInfo = null;

+            if (genericUdf instanceof GenericUDFBridge) {

+                functionInfo = funcExpr;

+            }

 

-			/**

-			 * generate the function call expression

-			 */

-			ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(

-					new HiveFunctionInfo(funcId, functionInfo), arguments);

-			AlgebricksExpr = AlgebricksFuncExpr;

+            /**

+             * generate the function call expression

+             */

+            ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(

+                    funcId, functionInfo), arguments);

+            AlgebricksExpr = AlgebricksFuncExpr;

 

-		} else if (hiveExpr instanceof ExprNodeColumnDesc) {

-			ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;

-			LogicalVariable var = this.getVariable(column.getColumn());

-			AlgebricksExpr = new VariableReferenceExpression(var);

+        } else if (hiveExpr instanceof ExprNodeColumnDesc) {

+            ExprNodeColumnDesc column = (ExprNodeColumnDesc) hiveExpr;

+            LogicalVariable var = this.getVariable(column.getColumn());

+            AlgebricksExpr = new VariableReferenceExpression(var);

 

-		} else if (hiveExpr instanceof ExprNodeFieldDesc) {

-			FunctionIdentifier funcId;

-			funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,

-					ExpressionConstant.FIELDACCESS);

+        } else if (hiveExpr instanceof ExprNodeFieldDesc) {

+            FunctionIdentifier funcId;

+            funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.FIELDACCESS);

 

-			ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(

-					new HiveFunctionInfo(funcId, hiveExpr));

-			AlgebricksExpr = AlgebricksFuncExpr;

-		} else if (hiveExpr instanceof ExprNodeConstantDesc) {

-			ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;

-			Object value = hiveConst.getValue();

-			AlgebricksExpr = new ConstantExpression(

-					new HivesterixConstantValue(value));

-		} else if (hiveExpr instanceof ExprNodeNullDesc) {

-			FunctionIdentifier funcId;

-			funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE,

-					ExpressionConstant.NULL);

+            ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(

+                    funcId, hiveExpr));

+            AlgebricksExpr = AlgebricksFuncExpr;

+        } else if (hiveExpr instanceof ExprNodeConstantDesc) {

+            ExprNodeConstantDesc hiveConst = (ExprNodeConstantDesc) hiveExpr;

+            Object value = hiveConst.getValue();

+            AlgebricksExpr = new ConstantExpression(new HivesterixConstantValue(value));

+        } else if (hiveExpr instanceof ExprNodeNullDesc) {

+            FunctionIdentifier funcId;

+            funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, ExpressionConstant.NULL);

 

-			ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(

-					new HiveFunctionInfo(funcId, hiveExpr));

+            ScalarFunctionCallExpression AlgebricksFuncExpr = new ScalarFunctionCallExpression(new HiveFunctionInfo(

+                    funcId, hiveExpr));

 

-			AlgebricksExpr = AlgebricksFuncExpr;

-		} else {

-			throw new IllegalStateException("unknown hive expression");

-		}

-		return new MutableObject<ILogicalExpression>(AlgebricksExpr);

-	}

+            AlgebricksExpr = AlgebricksFuncExpr;

+        } else {

+            throw new IllegalStateException("unknown hive expression");

+        }

+        return new MutableObject<ILogicalExpression>(AlgebricksExpr);

+    }

 

-	/**

-	 * translate aggregation function expression

-	 * 

-	 * @param aggregateDesc

-	 * @return

-	 */

-	public Mutable<ILogicalExpression> translateAggregation(

-			AggregationDesc aggregateDesc) {

+    /**

+     * translate aggregation function expression

+     * 

+     * @param aggregateDesc

+     * @return

+     */

+    public Mutable<ILogicalExpression> translateAggregation(AggregationDesc aggregateDesc) {

 

-		String UDAFName = aggregateDesc.getGenericUDAFName();

+        String UDAFName = aggregateDesc.getGenericUDAFName();

 

-		List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();

-		List<ExprNodeDesc> children = aggregateDesc.getParameters();

+        List<Mutable<ILogicalExpression>> arguments = new ArrayList<Mutable<ILogicalExpression>>();

+        List<ExprNodeDesc> children = aggregateDesc.getParameters();

 

-		for (ExprNodeDesc child : children)

-			arguments.add(translateScalarFucntion(child));

+        for (ExprNodeDesc child : children)

+            arguments.add(translateScalarFucntion(child));

 

-		FunctionIdentifier funcId = new FunctionIdentifier(

-				ExpressionConstant.NAMESPACE, UDAFName + "("

-						+ aggregateDesc.getMode() + ")");

-		HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);

-		AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(

-				funcInfo, false, arguments);

-		return new MutableObject<ILogicalExpression>(aggregationExpression);

-	}

+        FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDAFName + "("

+                + aggregateDesc.getMode() + ")");

+        HiveFunctionInfo funcInfo = new HiveFunctionInfo(funcId, aggregateDesc);

+        AggregateFunctionCallExpression aggregationExpression = new AggregateFunctionCallExpression(funcInfo, false,

+                arguments);

+        return new MutableObject<ILogicalExpression>(aggregationExpression);

+    }

 

-	/**

-	 * translate aggregation function expression

-	 * 

-	 * @param aggregator

-	 * @return

-	 */

-	public Mutable<ILogicalExpression> translateUnnestFunction(

-			UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {

+    /**

+     * translate aggregation function expression

+     * 

+     * @param aggregator

+     * @return

+     */

+    public Mutable<ILogicalExpression> translateUnnestFunction(UDTFDesc udtfDesc, Mutable<ILogicalExpression> argument) {

 

-		String UDTFName = udtfDesc.getUDTFName();

+        String UDTFName = udtfDesc.getUDTFName();

 

-		FunctionIdentifier funcId = new FunctionIdentifier(

-				ExpressionConstant.NAMESPACE, UDTFName);

-		UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(

-				new HiveFunctionInfo(funcId, udtfDesc));

-		unnestingExpression.getArguments().add(argument);

-		return new MutableObject<ILogicalExpression>(unnestingExpression);

-	}

+        FunctionIdentifier funcId = new FunctionIdentifier(ExpressionConstant.NAMESPACE, UDTFName);

+        UnnestingFunctionCallExpression unnestingExpression = new UnnestingFunctionCallExpression(new HiveFunctionInfo(

+                funcId, udtfDesc));

+        unnestingExpression.getArguments().add(argument);

+        return new MutableObject<ILogicalExpression>(unnestingExpression);

+    }

 

-	/**

-	 * get typeinfo

-	 */

-	@Override

-	public TypeInfo getType(LogicalVariable var) {

-		return variableToType.get(var);

-	}

+    /**

+     * get typeinfo

+     */

+    @Override

+    public TypeInfo getType(LogicalVariable var) {

+        return variableToType.get(var);

+    }

 

-	/**

-	 * get variable from variable name

-	 */

-	@Override

-	public LogicalVariable getVariable(String name) {

-		return nameToLogicalVariableMap.get(name);

-	}

+    /**

+     * get variable from variable name

+     */

+    @Override

+    public LogicalVariable getVariable(String name) {

+        return nameToLogicalVariableMap.get(name);

+    }

 

-	@Override

-	public LogicalVariable getVariableFromFieldName(String fieldName) {

-		return this.getVariableOnly(fieldName);

-	}

+    @Override

+    public LogicalVariable getVariableFromFieldName(String fieldName) {

+        return this.getVariableOnly(fieldName);

+    }

 

-	/**

-	 * set the metadata provider

-	 */

-	@Override

-	public void setMetadataProvider(

-			IMetadataProvider<PartitionDesc, Object> metadata) {

-		this.metaData = metadata;

-	}

+    /**

+     * set the metadata provider

+     */

+    @Override

+    public void setMetadataProvider(IMetadataProvider<PartitionDesc, Object> metadata) {

+        this.metaData = metadata;

+    }

 

-	/**

-	 * insert ReplicateOperator when necessary

-	 */

-	private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {

-		Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();

-		buildChildToParentsMapping(roots, childToParentsMap);

-		for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap

-				.entrySet()) {

-			List<Mutable<ILogicalOperator>> pList = entry.getValue();

-			if (pList.size() > 1) {

-				ILogicalOperator rop = new ReplicateOperator(pList.size());

-				Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(

-						rop);

-				Mutable<ILogicalOperator> childRef = entry.getKey();

-				rop.getInputs().add(childRef);

-				for (Mutable<ILogicalOperator> parentRef : pList) {

-					ILogicalOperator parentOp = parentRef.getValue();

-					int index = parentOp.getInputs().indexOf(childRef);

-					parentOp.getInputs().set(index, ropRef);

-				}

-			}

-		}

-	}

+    /**

+     * insert ReplicateOperator when necessary

+     */

+    private void insertReplicateOperator(List<Mutable<ILogicalOperator>> roots) {

+        Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> childToParentsMap = new HashMap<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>>();

+        buildChildToParentsMapping(roots, childToParentsMap);

+        for (Entry<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> entry : childToParentsMap.entrySet()) {

+            List<Mutable<ILogicalOperator>> pList = entry.getValue();

+            if (pList.size() > 1) {

+                ILogicalOperator rop = new ReplicateOperator(pList.size());

+                Mutable<ILogicalOperator> ropRef = new MutableObject<ILogicalOperator>(rop);

+                Mutable<ILogicalOperator> childRef = entry.getKey();

+                rop.getInputs().add(childRef);

+                for (Mutable<ILogicalOperator> parentRef : pList) {

+                    ILogicalOperator parentOp = parentRef.getValue();

+                    int index = parentOp.getInputs().indexOf(childRef);

+                    parentOp.getInputs().set(index, ropRef);

+                }

+            }

+        }

+    }

 

-	/**

-	 * build the mapping from child to Parents

-	 * 

-	 * @param roots

-	 * @param childToParentsMap

-	 */

-	private void buildChildToParentsMapping(

-			List<Mutable<ILogicalOperator>> roots,

-			Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {

-		for (Mutable<ILogicalOperator> opRef : roots) {

-			List<Mutable<ILogicalOperator>> childRefs = opRef.getValue()

-					.getInputs();

-			for (Mutable<ILogicalOperator> childRef : childRefs) {

-				List<Mutable<ILogicalOperator>> parentList = map.get(childRef);

-				if (parentList == null) {

-					parentList = new ArrayList<Mutable<ILogicalOperator>>();

-					map.put(childRef, parentList);

-				}

-				if (!parentList.contains(opRef))

-					parentList.add(opRef);

-			}

-			buildChildToParentsMapping(childRefs, map);

-		}

-	}

+    /**

+     * build the mapping from child to Parents

+     * 

+     * @param roots

+     * @param childToParentsMap

+     */

+    private void buildChildToParentsMapping(List<Mutable<ILogicalOperator>> roots,

+            Map<Mutable<ILogicalOperator>, List<Mutable<ILogicalOperator>>> map) {

+        for (Mutable<ILogicalOperator> opRef : roots) {

+            List<Mutable<ILogicalOperator>> childRefs = opRef.getValue().getInputs();

+            for (Mutable<ILogicalOperator> childRef : childRefs) {

+                List<Mutable<ILogicalOperator>> parentList = map.get(childRef);

+                if (parentList == null) {

+                    parentList = new ArrayList<Mutable<ILogicalOperator>>();

+                    map.put(childRef, parentList);

+                }

+                if (!parentList.contains(opRef))

+                    parentList.add(opRef);

+            }

+            buildChildToParentsMapping(childRefs, map);

+        }

+    }

 }

diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index 2d8829c..6c1ac72 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -43,6 +43,7 @@
 import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy.Policy;

 import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryComparatorFactoryProvider;

 import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFactoryProvider;

+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFamilyProvider;

 import edu.uci.ics.hivesterix.runtime.provider.HiveNormalizedKeyComputerFactoryProvider;

 import edu.uci.ics.hivesterix.runtime.provider.HivePrinterFactoryProvider;

 import edu.uci.ics.hivesterix.runtime.provider.HiveSerializerDeserializerProvider;

@@ -71,527 +72,483 @@
 @SuppressWarnings({ "rawtypes", "unchecked" })

 public class HyracksExecutionEngine implements IExecutionEngine {

 

-	private static final Log LOG = LogFactory

-			.getLog(HyracksExecutionEngine.class.getName());

+    private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());

 

-	// private static final String[] locConstraints = {}

+    // private static final String[] locConstraints = {}

 

-	private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

-	private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

-	static {

-		SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(

-				false);

-		SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(

-				true);

-		SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(

-				true);

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqCtrlFullDfs, HiveRuleCollections.NORMALIZATION));

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqCtrlNoDfs,

-						HiveRuleCollections.COND_PUSHDOWN_AND_JOIN_INFERENCE));

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqCtrlFullDfs, HiveRuleCollections.LOAD_FIELDS));

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqCtrlNoDfs, HiveRuleCollections.OP_PUSHDOWN));

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqOnceCtrl, HiveRuleCollections.DATA_EXCHANGE));

-		DEFAULT_LOGICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqCtrlNoDfs, HiveRuleCollections.CONSOLIDATION));

+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

+    static {

+        SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);

+        SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);

+        SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

+                HiveRuleCollections.NORMALIZATION));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.COND_PUSHDOWN_AND_JOIN_INFERENCE));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

+                HiveRuleCollections.LOAD_FIELDS));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.OP_PUSHDOWN));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.DATA_EXCHANGE));

+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

+                HiveRuleCollections.CONSOLIDATION));

 

-		DEFAULT_PHYSICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqOnceCtrl, HiveRuleCollections.PHYSICAL_PLAN_REWRITES));

-		DEFAULT_PHYSICAL_REWRITES

-				.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(

-						seqOnceCtrl, HiveRuleCollections.prepareJobGenRules));

-	}

+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.PHYSICAL_PLAN_REWRITES));

+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

+                HiveRuleCollections.prepareJobGenRules));

+    }

 

-	/**

-	 * static configurations for compiler

-	 */

-	private HeuristicCompilerFactoryBuilder builder;

+    /**

+     * static configurations for compiler

+     */

+    private HeuristicCompilerFactoryBuilder builder;

 

-	/**

-	 * compiler

-	 */

-	private ICompiler compiler;

+    /**

+     * compiler

+     */

+    private ICompiler compiler;

 

-	/**

-	 * physical optimization config

-	 */

-	private PhysicalOptimizationConfig physicalOptimizationConfig;

+    /**

+     * physical optimization config

+     */

+    private PhysicalOptimizationConfig physicalOptimizationConfig;

 

-	/**

-	 * final ending operators

-	 */

-	private List<Operator> leaveOps = new ArrayList<Operator>();

+    /**

+     * final ending operators

+     */

+    private List<Operator> leaveOps = new ArrayList<Operator>();

 

-	/**

-	 * tasks that are already visited

-	 */

-	private Map<Task<? extends Serializable>, Boolean> tasksVisited = new HashMap<Task<? extends Serializable>, Boolean>();

+    /**

+     * tasks that are already visited

+     */

+    private Map<Task<? extends Serializable>, Boolean> tasksVisited = new HashMap<Task<? extends Serializable>, Boolean>();

 

-	/**

-	 * hyracks job spec

-	 */

-	private JobSpecification jobSpec;

+    /**

+     * hyracks job spec

+     */

+    private JobSpecification jobSpec;

 

-	/**

-	 * hive configuration

-	 */

-	private HiveConf conf;

+    /**

+     * hive configuration

+     */

+    private HiveConf conf;

 

-	/**

-	 * plan printer

-	 */

-	private PrintWriter planPrinter;

+    /**

+     * plan printer

+     */

+    private PrintWriter planPrinter;

 

-	public HyracksExecutionEngine(HiveConf conf) {

-		this.conf = conf;

-		init(conf);

-	}

+    public HyracksExecutionEngine(HiveConf conf) {

+        this.conf = conf;

+        init(conf);

+    }

 

-	public HyracksExecutionEngine(HiveConf conf, PrintWriter planPrinter) {

-		this.conf = conf;

-		this.planPrinter = planPrinter;

-		init(conf);

-	}

+    public HyracksExecutionEngine(HiveConf conf, PrintWriter planPrinter) {

+        this.conf = conf;

+        this.planPrinter = planPrinter;

+        init(conf);

+    }

 

-	private void init(HiveConf conf) {

-		builder = new HeuristicCompilerFactoryBuilder(

-				DefaultOptimizationContextFactory.INSTANCE);

-		builder.setLogicalRewrites(DEFAULT_LOGICAL_REWRITES);

-		builder.setPhysicalRewrites(DEFAULT_PHYSICAL_REWRITES);

-		builder.setIMergeAggregationExpressionFactory(HiveMergeAggregationExpressionFactory.INSTANCE);

-		builder.setExpressionTypeComputer(HiveExpressionTypeComputer.INSTANCE);

-		builder.setNullableTypeComputer(HiveNullableTypeComputer.INSTANCE);

+    private void init(HiveConf conf) {

+        builder = new HeuristicCompilerFactoryBuilder(DefaultOptimizationContextFactory.INSTANCE);

+        builder.setLogicalRewrites(DEFAULT_LOGICAL_REWRITES);

+        builder.setPhysicalRewrites(DEFAULT_PHYSICAL_REWRITES);

+        builder.setIMergeAggregationExpressionFactory(HiveMergeAggregationExpressionFactory.INSTANCE);

+        builder.setExpressionTypeComputer(HiveExpressionTypeComputer.INSTANCE);

+        builder.setNullableTypeComputer(HiveNullableTypeComputer.INSTANCE);

 

-		long memSizeExternalGby = conf.getLong(

-				"hive.algebricks.groupby.external.memory", 268435456);

-		long memSizeExternalSort = conf.getLong("hive.algebricks.sort.memory",

-				536870912);

-		int frameSize = conf.getInt("hive.algebricks.framesize", 32768);

+        long memSizeExternalGby = conf.getLong("hive.algebricks.groupby.external.memory", 268435456);

+        long memSizeExternalSort = conf.getLong("hive.algebricks.sort.memory", 536870912);

+        int frameSize = conf.getInt("hive.algebricks.framesize", 32768);

 

-		physicalOptimizationConfig = new PhysicalOptimizationConfig();

-		int frameLimitExtGby = (int) (memSizeExternalGby / frameSize);

-		physicalOptimizationConfig

-				.setMaxFramesExternalGroupBy(frameLimitExtGby);

-		int frameLimitExtSort = (int) (memSizeExternalSort / frameSize);

-		physicalOptimizationConfig.setMaxFramesExternalSort(frameLimitExtSort);

-		builder.setPhysicalOptimizationConfig(physicalOptimizationConfig);

-	}

+        physicalOptimizationConfig = new PhysicalOptimizationConfig();

+        int frameLimitExtGby = (int) (memSizeExternalGby / frameSize);

+        physicalOptimizationConfig.setMaxFramesExternalGroupBy(frameLimitExtGby);

+        int frameLimitExtSort = (int) (memSizeExternalSort / frameSize);

+        physicalOptimizationConfig.setMaxFramesExternalSort(frameLimitExtSort);

+        builder.setPhysicalOptimizationConfig(physicalOptimizationConfig);

+    }

 

-	@Override

-	public int compileJob(List<Task<? extends Serializable>> rootTasks) {

-		// clean up

-		leaveOps.clear();

-		tasksVisited.clear();

-		jobSpec = null;

+    @Override

+    public int compileJob(List<Task<? extends Serializable>> rootTasks) {

+        // clean up

+        leaveOps.clear();

+        tasksVisited.clear();

+        jobSpec = null;

 

-		HashMap<String, PartitionDesc> aliasToPath = new HashMap<String, PartitionDesc>();

-		List<Operator> rootOps = generateRootOperatorDAG(rootTasks, aliasToPath);

+        HashMap<String, PartitionDesc> aliasToPath = new HashMap<String, PartitionDesc>();

+        List<Operator> rootOps = generateRootOperatorDAG(rootTasks, aliasToPath);

 

-		// get all leave Ops

-		getLeaves(rootOps, leaveOps);

+        // get all leave Ops

+        getLeaves(rootOps, leaveOps);

 

-		HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();

-		try {

-			translator.translate(rootOps, null, aliasToPath);

+        HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();

+        try {

+            translator.translate(rootOps, null, aliasToPath);

 

-			ILogicalPlan plan = translator.genLogicalPlan();

+            ILogicalPlan plan = translator.genLogicalPlan();

 

-			if (plan.getRoots() != null && plan.getRoots().size() > 0

-					&& plan.getRoots().get(0).getValue() != null) {

-				translator.printOperators();

-				System.out.println("translate complete");

-				ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(

-						plan, translator.getMetadataProvider());

+            if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {

+                translator.printOperators();

+                ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,

+                        translator.getMetadataProvider());

 

-				ICompilerFactory compilerFactory = builder.create();

-				compiler = compilerFactory.createCompiler(

-						planAndMetadata.getPlan(),

-						planAndMetadata.getMetadataProvider(),

-						translator.getVariableCounter());

+                ICompilerFactory compilerFactory = builder.create();

+                compiler = compilerFactory.createCompiler(planAndMetadata.getPlan(),

+                        planAndMetadata.getMetadataProvider(), translator.getVariableCounter());

 

-				// run optimization and re-writing rules for Hive plan

-				compiler.optimize();

+                // run optimization and re-writing rules for Hive plan

+                compiler.optimize();

 

-				// print optimized plan

-				LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

-				StringBuilder buffer = new StringBuilder();

-				PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

-				String planStr = buffer.toString();

-				System.out.println(planStr);

+                // print optimized plan

+                LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

+                StringBuilder buffer = new StringBuilder();

+                PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

+                String planStr = buffer.toString();

+                System.out.println(planStr);

 

-				if (planPrinter != null)

-					planPrinter.print(planStr);

-			}

-		} catch (Exception e) {

-			e.printStackTrace();

-			return 1;

-		}

+                if (planPrinter != null)

+                    planPrinter.print(planStr);

+            }

+        } catch (Exception e) {

+            e.printStackTrace();

+            return 1;

+        }

 

-		return 0;

-	}

+        return 0;

+    }

 

-	private void codeGen() throws AlgebricksException {

-		// number of cpu cores in the cluster

-		builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(

-				ConfUtil.getNCs()));

-		// builder.setClusterTopology(ConfUtil.getClusterTopology());

-		builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);

-		builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);

-		builder.setComparatorFactoryProvider(HiveBinaryComparatorFactoryProvider.INSTANCE);

-		builder.setExpressionRuntimeProvider(HiveExpressionRuntimeProvider.INSTANCE);

-		builder.setHashFunctionFactoryProvider(HiveBinaryHashFunctionFactoryProvider.INSTANCE);

-		builder.setPrinterProvider(HivePrinterFactoryProvider.INSTANCE);

-		builder.setSerializerDeserializerProvider(HiveSerializerDeserializerProvider.INSTANCE);

-		builder.setNullWriterFactory(HiveNullWriterFactory.INSTANCE);

-		builder.setNormalizedKeyComputerFactoryProvider(HiveNormalizedKeyComputerFactoryProvider.INSTANCE);

-		builder.setPartialAggregationTypeComputer(HivePartialAggregationTypeComputer.INSTANCE);

-		builder.setTypeTraitProvider(HiveTypeTraitProvider.INSTANCE);

+    private void codeGen() throws AlgebricksException {

+        // number of cpu cores in the cluster

+        builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));

+        // builder.setClusterTopology(ConfUtil.getClusterTopology());

+        builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);

+        builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);

+        builder.setComparatorFactoryProvider(HiveBinaryComparatorFactoryProvider.INSTANCE);

+        builder.setExpressionRuntimeProvider(HiveExpressionRuntimeProvider.INSTANCE);

+        builder.setHashFunctionFactoryProvider(HiveBinaryHashFunctionFactoryProvider.INSTANCE);

+        builder.setPrinterProvider(HivePrinterFactoryProvider.INSTANCE);

+        builder.setSerializerDeserializerProvider(HiveSerializerDeserializerProvider.INSTANCE);

+        builder.setNullWriterFactory(HiveNullWriterFactory.INSTANCE);

+        builder.setNormalizedKeyComputerFactoryProvider(HiveNormalizedKeyComputerFactoryProvider.INSTANCE);

+        builder.setPartialAggregationTypeComputer(HivePartialAggregationTypeComputer.INSTANCE);

+        builder.setTypeTraitProvider(HiveTypeTraitProvider.INSTANCE);

+        builder.setHashFunctionFamilyProvider(HiveBinaryHashFunctionFamilyProvider.INSTANCE);

 

-		jobSpec = compiler.createJob(null);

+        jobSpec = compiler.createJob(null);

 

-		// set the policy

-		String policyStr = conf.get("hive.hyracks.connectorpolicy");

-		if (policyStr == null)

-			policyStr = "PIPELINING";

-		Policy policyValue = Policy.valueOf(policyStr);

-		jobSpec.setConnectorPolicyAssignmentPolicy(new HiveConnectorPolicyAssignmentPolicy(

-				policyValue));

+        // set the policy

+        String policyStr = conf.get("hive.hyracks.connectorpolicy");

+        if (policyStr == null)

+            policyStr = "PIPELINING";

+        Policy policyValue = Policy.valueOf(policyStr);

+        jobSpec.setConnectorPolicyAssignmentPolicy(new HiveConnectorPolicyAssignmentPolicy(policyValue));

+        jobSpec.setUseConnectorPolicyForScheduling(false);

+    }

 

-		// execute the job

-		System.out.println(jobSpec.toString());

-	}

+    @Override

+    public int executeJob() {

+        try {

+            codeGen();

+            executeHyraxJob(jobSpec);

+        } catch (Exception e) {

+            e.printStackTrace();

+            return 1;

+        }

+        return 0;

+    }

 

-	@Override

-	public int executeJob() {

-		try {

-			codeGen();

-			executeHyraxJob(jobSpec);

-		} catch (Exception e) {

-			e.printStackTrace();

-			return 1;

-		}

-		return 0;

-	}

+    private List<Operator> generateRootOperatorDAG(List<Task<? extends Serializable>> rootTasks,

+            HashMap<String, PartitionDesc> aliasToPath) {

 

-	private List<Operator> generateRootOperatorDAG(

-			List<Task<? extends Serializable>> rootTasks,

-			HashMap<String, PartitionDesc> aliasToPath) {

+        List<Operator> rootOps = new ArrayList<Operator>();

+        List<Task<? extends Serializable>> toDelete = new ArrayList<Task<? extends Serializable>>();

+        tasksVisited.clear();

 

-		List<Operator> rootOps = new ArrayList<Operator>();

-		List<Task<? extends Serializable>> toDelete = new ArrayList<Task<? extends Serializable>>();

-		tasksVisited.clear();

+        for (int i = rootTasks.size() - 1; i >= 0; i--) {

+            /**

+             * list of map-reduce tasks

+             */

+            Task<? extends Serializable> task = rootTasks.get(i);

+            // System.out.println("!" + task.getName());

 

-		for (int i = rootTasks.size() - 1; i >= 0; i--) {

-			/**

-			 * list of map-reduce tasks

-			 */

-			Task<? extends Serializable> task = rootTasks.get(i);

-			// System.out.println("!" + task.getName());

+            if (task instanceof MapRedTask) {

+                List<Operator> mapRootOps = articulateMapReduceOperators(task, rootOps, aliasToPath, rootTasks);

+                if (i == 0)

+                    rootOps.addAll(mapRootOps);

+                else {

+                    List<Operator> leaves = new ArrayList<Operator>();

+                    getLeaves(rootOps, leaves);

 

-			if (task instanceof MapRedTask) {

-				List<Operator> mapRootOps = articulateMapReduceOperators(task,

-						rootOps, aliasToPath, rootTasks);

-				if (i == 0)

-					rootOps.addAll(mapRootOps);

-				else {

-					List<Operator> leaves = new ArrayList<Operator>();

-					getLeaves(rootOps, leaves);

+                    List<Operator> mapChildren = new ArrayList<Operator>();

+                    for (Operator childMap : mapRootOps) {

+                        if (childMap instanceof TableScanOperator) {

+                            TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

+                            if (topDesc == null)

+                                mapChildren.add(childMap);

+                            else {

+                                rootOps.add(childMap);

+                            }

+                        } else

+                            mapChildren.add(childMap);

+                    }

 

-					List<Operator> mapChildren = new ArrayList<Operator>();

-					for (Operator childMap : mapRootOps) {

-						if (childMap instanceof TableScanOperator) {

-							TableScanDesc topDesc = (TableScanDesc) childMap

-									.getConf();

-							if (topDesc == null)

-								mapChildren.add(childMap);

-							else {

-								rootOps.add(childMap);

-							}

-						} else

-							mapChildren.add(childMap);

-					}

+                    if (mapChildren.size() > 0) {

+                        for (Operator leaf : leaves)

+                            leaf.setChildOperators(mapChildren);

+                        for (Operator child : mapChildren)

+                            child.setParentOperators(leaves);

+                    }

+                }

 

-					if (mapChildren.size() > 0) {

-						for (Operator leaf : leaves)

-							leaf.setChildOperators(mapChildren);

-						for (Operator child : mapChildren)

-							child.setParentOperators(leaves);

-					}

-				}

+                MapredWork mr = (MapredWork) task.getWork();

+                HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

 

-				MapredWork mr = (MapredWork) task.getWork();

-				HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

+                addAliasToPartition(aliasToPath, map);

+                toDelete.add(task);

+            }

+        }

 

-				addAliasToPartition(aliasToPath, map);

-				toDelete.add(task);

-			}

-		}

+        for (Task<? extends Serializable> task : toDelete)

+            rootTasks.remove(task);

 

-		for (Task<? extends Serializable> task : toDelete)

-			rootTasks.remove(task);

-

-		return rootOps;

-	}

+        return rootOps;

+    }

 

-	private void addAliasToPartition(

-			HashMap<String, PartitionDesc> aliasToPath,

-			HashMap<String, PartitionDesc> map) {

-		Iterator<String> keys = map.keySet().iterator();

-		while (keys.hasNext()) {

-			String key = keys.next();

-			PartitionDesc part = map.get(key);

-			String[] names = key.split(":");

-			for (String name : names) {

-				aliasToPath.put(name, part);

-			}

-		}

-	}

+    private void addAliasToPartition(HashMap<String, PartitionDesc> aliasToPath, HashMap<String, PartitionDesc> map) {

+        Iterator<String> keys = map.keySet().iterator();

+        while (keys.hasNext()) {

+            String key = keys.next();

+            PartitionDesc part = map.get(key);

+            String[] names = key.split(":");

+            for (String name : names) {

+                aliasToPath.put(name, part);

+            }

+        }

+    }

 

-	private List<Operator> articulateMapReduceOperators(Task task,

-			List<Operator> rootOps, HashMap<String, PartitionDesc> aliasToPath,

-			List<Task<? extends Serializable>> rootTasks) {

-		// System.out.println("!"+task.getName());

-		if (!(task instanceof MapRedTask)) {

-			if (!(task instanceof ConditionalTask)) {

-				rootTasks.add(task);

-				return null;

-			} else {

-				// remove map-reduce branches in condition task

-				ConditionalTask condition = (ConditionalTask) task;

-				List<Task<? extends Serializable>> branches = condition

-						.getListTasks();

-				for (int i = branches.size() - 1; i >= 0; i--) {

-					Task branch = branches.get(i);

-					if (branch instanceof MapRedTask) {

-						return articulateMapReduceOperators(branch, rootOps,

-								aliasToPath, rootTasks);

-					}

-				}

-				rootTasks.add(task);

-				return null;

-			}

-		}

+    private List<Operator> articulateMapReduceOperators(Task task, List<Operator> rootOps,

+            HashMap<String, PartitionDesc> aliasToPath, List<Task<? extends Serializable>> rootTasks) {

+        // System.out.println("!"+task.getName());

+        if (!(task instanceof MapRedTask)) {

+            if (!(task instanceof ConditionalTask)) {

+                rootTasks.add(task);

+                return null;

+            } else {

+                // remove map-reduce branches in condition task

+                ConditionalTask condition = (ConditionalTask) task;

+                List<Task<? extends Serializable>> branches = condition.getListTasks();

+                for (int i = branches.size() - 1; i >= 0; i--) {

+                    Task branch = branches.get(i);

+                    if (branch instanceof MapRedTask) {

+                        return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);

+                    }

+                }

+                rootTasks.add(task);

+                return null;

+            }

+        }

 

-		MapredWork mr = (MapredWork) task.getWork();

-		HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

+        MapredWork mr = (MapredWork) task.getWork();

+        HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

 

-		// put all aliasToParitionDesc mapping into the map

-		addAliasToPartition(aliasToPath, map);

+        // put all aliasToParitionDesc mapping into the map

+        addAliasToPartition(aliasToPath, map);

 

-		MapRedTask mrtask = (MapRedTask) task;

-		MapredWork work = (MapredWork) mrtask.getWork();

-		HashMap<String, Operator<? extends Serializable>> operators = work

-				.getAliasToWork();

+        MapRedTask mrtask = (MapRedTask) task;

+        MapredWork work = (MapredWork) mrtask.getWork();

+        HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();

 

-		Set entries = operators.entrySet();

-		Iterator<Entry<String, Operator>> iterator = entries.iterator();

-		List<Operator> mapRootOps = new ArrayList<Operator>();

+        Set entries = operators.entrySet();

+        Iterator<Entry<String, Operator>> iterator = entries.iterator();

+        List<Operator> mapRootOps = new ArrayList<Operator>();

 

-		// get map root operators

-		while (iterator.hasNext()) {

-			Operator next = iterator.next().getValue();

-			if (!mapRootOps.contains(next)) {

-				// clear that only for the case of union

-				mapRootOps.add(next);

-			}

-		}

+        // get map root operators

+        while (iterator.hasNext()) {

+            Operator next = iterator.next().getValue();

+            if (!mapRootOps.contains(next)) {

+                // clear that only for the case of union

+                mapRootOps.add(next);

+            }

+        }

 

-		// get map local work

-		MapredLocalWork localWork = work.getMapLocalWork();

-		if (localWork != null) {

-			HashMap<String, Operator<? extends Serializable>> localOperators = localWork

-					.getAliasToWork();

+        // get map local work

+        MapredLocalWork localWork = work.getMapLocalWork();

+        if (localWork != null) {

+            HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();

 

-			Set localEntries = localOperators.entrySet();

-			Iterator<Entry<String, Operator>> localIterator = localEntries

-					.iterator();

-			while (localIterator.hasNext()) {

-				mapRootOps.add(localIterator.next().getValue());

-			}

+            Set localEntries = localOperators.entrySet();

+            Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();

+            while (localIterator.hasNext()) {

+                mapRootOps.add(localIterator.next().getValue());

+            }

 

-			HashMap<String, FetchWork> localFetch = localWork

-					.getAliasToFetchWork();

-			Set localFetchEntries = localFetch.entrySet();

-			Iterator<Entry<String, FetchWork>> localFetchIterator = localFetchEntries

-					.iterator();

-			while (localFetchIterator.hasNext()) {

-				Entry<String, FetchWork> fetchMap = localFetchIterator.next();

-				FetchWork fetch = fetchMap.getValue();

-				String alias = fetchMap.getKey();

-				List<PartitionDesc> dirPart = fetch.getPartDesc();

+            HashMap<String, FetchWork> localFetch = localWork.getAliasToFetchWork();

+            Set localFetchEntries = localFetch.entrySet();

+            Iterator<Entry<String, FetchWork>> localFetchIterator = localFetchEntries.iterator();

+            while (localFetchIterator.hasNext()) {

+                Entry<String, FetchWork> fetchMap = localFetchIterator.next();

+                FetchWork fetch = fetchMap.getValue();

+                String alias = fetchMap.getKey();

+                List<PartitionDesc> dirPart = fetch.getPartDesc();

 

-				// temporary hack: put the first partitionDesc into the map

-				aliasToPath.put(alias, dirPart.get(0));

-			}

-		}

+                // temporary hack: put the first partitionDesc into the map

+                aliasToPath.put(alias, dirPart.get(0));

+            }

+        }

 

-		Boolean visited = tasksVisited.get(task);

-		if (visited != null && visited.booleanValue() == true) {

-			return mapRootOps;

-		}

+        Boolean visited = tasksVisited.get(task);

+        if (visited != null && visited.booleanValue() == true) {

+            return mapRootOps;

+        }

 

-		// do that only for union operator

-		for (Operator op : mapRootOps)

-			if (op.getParentOperators() != null)

-				op.getParentOperators().clear();

+        // do that only for union operator

+        for (Operator op : mapRootOps)

+            if (op.getParentOperators() != null)

+                op.getParentOperators().clear();

 

-		List<Operator> mapLeaves = new ArrayList<Operator>();

-		downToLeaves(mapRootOps, mapLeaves);

-		List<Operator> reduceOps = new ArrayList<Operator>();

+        List<Operator> mapLeaves = new ArrayList<Operator>();

+        downToLeaves(mapRootOps, mapLeaves);

+        List<Operator> reduceOps = new ArrayList<Operator>();

 

-		if (work.getReducer() != null)

-			reduceOps.add(work.getReducer());

+        if (work.getReducer() != null)

+            reduceOps.add(work.getReducer());

 

-		for (Operator mapLeaf : mapLeaves) {

-			mapLeaf.setChildOperators(reduceOps);

-		}

+        for (Operator mapLeaf : mapLeaves) {

+            mapLeaf.setChildOperators(reduceOps);

+        }

 

-		for (Operator reduceOp : reduceOps) {

-			if (reduceOp != null)

-				reduceOp.setParentOperators(mapLeaves);

-		}

+        for (Operator reduceOp : reduceOps) {

+            if (reduceOp != null)

+                reduceOp.setParentOperators(mapLeaves);

+        }

 

-		List<Operator> leafs = new ArrayList<Operator>();

-		if (reduceOps.size() > 0) {

-			downToLeaves(reduceOps, leafs);

-		} else {

-			leafs = mapLeaves;

-		}

+        List<Operator> leafs = new ArrayList<Operator>();

+        if (reduceOps.size() > 0) {

+            downToLeaves(reduceOps, leafs);

+        } else {

+            leafs = mapLeaves;

+        }

 

-		List<Operator> mapChildren = new ArrayList<Operator>();

-		if (task.getChildTasks() != null && task.getChildTasks().size() > 0) {

-			System.out.println("have child tasks!!");

-			for (Object child : task.getChildTasks()) {

-				System.out.println(child.getClass().getName());

-				List<Operator> childMapOps = articulateMapReduceOperators(

-						(Task) child, rootOps, aliasToPath, rootTasks);

-				if (childMapOps == null)

-					continue;

+        List<Operator> mapChildren = new ArrayList<Operator>();

+        if (task.getChildTasks() != null && task.getChildTasks().size() > 0) {

+            for (Object child : task.getChildTasks()) {

+                List<Operator> childMapOps = articulateMapReduceOperators((Task) child, rootOps, aliasToPath, rootTasks);

+                if (childMapOps == null)

+                    continue;

 

-				for (Operator childMap : childMapOps) {

-					if (childMap instanceof TableScanOperator) {

-						TableScanDesc topDesc = (TableScanDesc) childMap

-								.getConf();

-						if (topDesc == null)

-							mapChildren.add(childMap);

-						else {

-							rootOps.add(childMap);

-						}

-					} else {

-						// if not table scan, add the child

-						mapChildren.add(childMap);

-					}

-				}

-			}

+                for (Operator childMap : childMapOps) {

+                    if (childMap instanceof TableScanOperator) {

+                        TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

+                        if (topDesc == null)

+                            mapChildren.add(childMap);

+                        else {

+                            rootOps.add(childMap);

+                        }

+                    } else {

+                        // if not table scan, add the child

+                        mapChildren.add(childMap);

+                    }

+                }

+            }

 

-			if (mapChildren.size() > 0) {

-				int i = 0;

-				for (Operator leaf : leafs) {

-					if (leaf.getChildOperators() == null

-							|| leaf.getChildOperators().size() == 0)

-						leaf.setChildOperators(new ArrayList<Operator>());

-					leaf.getChildOperators().add(mapChildren.get(i));

-					i++;

-				}

-				i = 0;

-				for (Operator child : mapChildren) {

-					if (child.getParentOperators() == null

-							|| child.getParentOperators().size() == 0)

-						child.setParentOperators(new ArrayList<Operator>());

-					child.getParentOperators().add(leafs.get(i));

-					i++;

-				}

-			}

-		}

+            if (mapChildren.size() > 0) {

+                int i = 0;

+                for (Operator leaf : leafs) {

+                    if (leaf.getChildOperators() == null || leaf.getChildOperators().size() == 0)

+                        leaf.setChildOperators(new ArrayList<Operator>());

+                    leaf.getChildOperators().add(mapChildren.get(i));

+                    i++;

+                }

+                i = 0;

+                for (Operator child : mapChildren) {

+                    if (child.getParentOperators() == null || child.getParentOperators().size() == 0)

+                        child.setParentOperators(new ArrayList<Operator>());

+                    child.getParentOperators().add(leafs.get(i));

+                    i++;

+                }

+            }

+        }

 

-		// mark this task as visited

-		this.tasksVisited.put(task, true);

-		return mapRootOps;

-	}

+        // mark this task as visited

+        this.tasksVisited.put(task, true);

+        return mapRootOps;

+    }

 

-	/**

-	 * down to leaf nodes

-	 * 

-	 * @param ops

-	 * @param leaves

-	 */

-	private void downToLeaves(List<Operator> ops, List<Operator> leaves) {

+    /**

+     * down to leaf nodes

+     * 

+     * @param ops

+     * @param leaves

+     */

+    private void downToLeaves(List<Operator> ops, List<Operator> leaves) {

 

-		// Operator currentOp;

-		for (Operator op : ops) {

-			if (op != null && op.getChildOperators() != null

-					&& op.getChildOperators().size() > 0) {

-				downToLeaves(op.getChildOperators(), leaves);

-			} else {

-				if (op != null && leaves.indexOf(op) < 0)

-					leaves.add(op);

-			}

-		}

-	}

+        // Operator currentOp;

+        for (Operator op : ops) {

+            if (op != null && op.getChildOperators() != null && op.getChildOperators().size() > 0) {

+                downToLeaves(op.getChildOperators(), leaves);

+            } else {

+                if (op != null && leaves.indexOf(op) < 0)

+                    leaves.add(op);

+            }

+        }

+    }

 

-	private void getLeaves(List<Operator> roots, List<Operator> currentLeaves) {

-		for (Operator op : roots) {

-			List<Operator> children = op.getChildOperators();

-			if (children == null || children.size() <= 0) {

-				currentLeaves.add(op);

-			} else {

-				getLeaves(children, currentLeaves);

-			}

-		}

-	}

+    private void getLeaves(List<Operator> roots, List<Operator> currentLeaves) {

+        for (Operator op : roots) {

+            List<Operator> children = op.getChildOperators();

+            if (children == null || children.size() <= 0) {

+                currentLeaves.add(op);

+            } else {

+                getLeaves(children, currentLeaves);

+            }

+        }

+    }

 

-	private void executeHyraxJob(JobSpecification job) throws Exception {

-		String ipAddress = conf.get("hive.hyracks.host");

-		int port = Integer.parseInt(conf.get("hive.hyracks.port"));

-		String applicationName = conf.get("hive.hyracks.app");

-		System.out.println("connect to " + ipAddress + " " + port);

+    private void executeHyraxJob(JobSpecification job) throws Exception {

+        String ipAddress = conf.get("hive.hyracks.host");

+        int port = Integer.parseInt(conf.get("hive.hyracks.port"));

+        String applicationName = conf.get("hive.hyracks.app");

+        //System.out.println("connect to " + ipAddress + " " + port);

 

-		IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);

+        IHyracksClientConnection hcc = new HyracksConnection(ipAddress, port);

 

-		System.out.println("get connected");

-		long start = System.currentTimeMillis();

-		JobId jobId = hcc.startJob(applicationName, job);

-		hcc.waitForCompletion(jobId);

+        //System.out.println("get connected");

+        long start = System.currentTimeMillis();

+        JobId jobId = hcc.startJob(applicationName, job);

+        hcc.waitForCompletion(jobId);

 

-		System.out.println("job finished: " + jobId.toString());

-		// call all leave nodes to end

-		for (Operator leaf : leaveOps) {

-			jobClose(leaf);

-		}

+        //System.out.println("job finished: " + jobId.toString());

+        // call all leave nodes to end

+        for (Operator leaf : leaveOps) {

+            jobClose(leaf);

+        }

 

-		long end = System.currentTimeMillis();

-		System.err.println(start + " " + end + " " + (end - start));

-	}

+        long end = System.currentTimeMillis();

+        System.err.println(start + " " + end + " " + (end - start));

+    }

 

-	/**

-	 * mv to final directory on hdfs (not real final)

-	 * 

-	 * @param leaf

-	 * @throws Exception

-	 */

-	private void jobClose(Operator leaf) throws Exception {

-		FileSinkOperator fsOp = (FileSinkOperator) leaf;

-		FileSinkDesc desc = fsOp.getConf();

-		boolean isNativeTable = !desc.getTableInfo().isNonNative();

-		if ((conf != null) && isNativeTable) {

-			String specPath = desc.getDirName();

-			DynamicPartitionCtx dpCtx = desc.getDynPartCtx();

-			// for 0.7.0

-			fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);

-			// for 0.8.0

-			// Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,

-			// desc);

-		}

-	}

+    /**

+     * mv to final directory on hdfs (not real final)

+     * 

+     * @param leaf

+     * @throws Exception

+     */

+    private void jobClose(Operator leaf) throws Exception {

+        FileSinkOperator fsOp = (FileSinkOperator) leaf;

+        FileSinkDesc desc = fsOp.getConf();

+        boolean isNativeTable = !desc.getTableInfo().isNonNative();

+        if ((conf != null) && isNativeTable) {

+            String specPath = desc.getDirName();

+            DynamicPartitionCtx dpCtx = desc.getDynPartCtx();

+            // for 0.7.0

+            fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);

+            // for 0.8.0

+            // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,

+            // desc);

+        }

+    }

 }

diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
new file mode 100644
index 0000000..760a614
--- /dev/null
+++ b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/factory/hashfunction/MurmurHash3BinaryHashFunctionFamily.java
@@ -0,0 +1,63 @@
+package edu.uci.ics.hivesterix.runtime.factory.hashfunction;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunction;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public class MurmurHash3BinaryHashFunctionFamily implements IBinaryHashFunctionFamily {
+
+    public static final IBinaryHashFunctionFamily INSTANCE = new MurmurHash3BinaryHashFunctionFamily();
+
+    private static final long serialVersionUID = 1L;
+
+    private MurmurHash3BinaryHashFunctionFamily() {
+    }
+
+    private static final int C1 = 0xcc9e2d51;
+    private static final int C2 = 0x1b873593;
+    private static final int C3 = 5;
+    private static final int C4 = 0xe6546b64;
+    private static final int C5 = 0x85ebca6b;
+    private static final int C6 = 0xc2b2ae35;
+
+    @Override
+    public IBinaryHashFunction createBinaryHashFunction(final int seed) {
+        return new IBinaryHashFunction() {
+            @Override
+            public int hash(byte[] bytes, int offset, int length) {
+                int h = seed;
+                int p = offset;
+                int remain = length;
+                while (remain >= 4) {
+                    int k = (bytes[p] & 0xff) | ((bytes[p + 1] & 0xff) << 8) | ((bytes[p + 2] & 0xff) << 16)
+                            | ((bytes[p + 3] & 0xff) << 24);
+                    k *= C1;
+                    k = Integer.rotateLeft(k, 15);
+                    k *= C2;
+                    h ^= k;
+                    h = Integer.rotateLeft(h, 13);
+                    h = h * C3 + C4;
+                    p += 4;
+                    remain -= 4;
+                }
+                if (remain > 0) {
+                    int k = 0;
+                    for (int i = 0; remain > 0; i += 8) {
+                        k ^= (bytes[p++] & 0xff) << i;
+                        remain--;
+                    }
+                    k *= C1;
+                    k = Integer.rotateLeft(k, 15);
+                    k *= C2;
+                    h ^= k;
+                }
+                h ^= length;
+                h ^= (h >>> 16);
+                h *= C5;
+                h ^= (h >>> 13);
+                h *= C6;
+                h ^= (h >>> 16);
+                return h;
+            }
+        };
+    }
+}
\ No newline at end of file
diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
index 12222b4..7681bd1 100644
--- a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
+++ b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/operator/filescan/HiveTupleParser.java
@@ -175,9 +175,7 @@
 				if (outputColumnsOffset[i] >= 0)

 					outputFieldRefs[outputColumnsOffset[i]] = fieldRefs.get(i);

 

-			long serDeTime = 0;

 			while (reader.next(key, value)) {

-				long start = System.currentTimeMillis();

 				// reuse the tuple builder

 				tb.reset();

 				if (parser != null) {

@@ -205,8 +203,6 @@
 						i++;

 					}

 				}

-				long end = System.currentTimeMillis();

-				serDeTime += (end - start);

 

 				if (!appender.append(tb.getFieldEndOffsets(),

 						tb.getByteArray(), 0, tb.getSize())) {

@@ -221,7 +217,6 @@
 					}

 				}

 			}

-			System.out.println("serde time: " + serDeTime);

 			reader.close();

 			System.gc();

 

diff --git a/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
new file mode 100644
index 0000000..e7a2e79
--- /dev/null
+++ b/hivesterix/src/main/java/edu/uci/ics/hivesterix/runtime/provider/HiveBinaryHashFunctionFamilyProvider.java
@@ -0,0 +1,20 @@
+package edu.uci.ics.hivesterix.runtime.provider;
+
+import edu.uci.ics.hivesterix.runtime.factory.hashfunction.MurmurHash3BinaryHashFunctionFamily;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.data.IBinaryHashFunctionFamilyProvider;
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryHashFunctionFamily;
+
+public class HiveBinaryHashFunctionFamilyProvider implements IBinaryHashFunctionFamilyProvider {
+
+    public static HiveBinaryHashFunctionFamilyProvider INSTANCE = new HiveBinaryHashFunctionFamilyProvider();
+
+    private HiveBinaryHashFunctionFamilyProvider() {
+
+    }
+
+    @Override
+    public IBinaryHashFunctionFamily getBinaryHashFunctionFamily(Object type) throws AlgebricksException {
+        return MurmurHash3BinaryHashFunctionFamily.INSTANCE;
+    }
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/RawUTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/RawUTF8StringPointable.java
new file mode 100644
index 0000000..c90ce5a
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/RawUTF8StringPointable.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.hyracks.data.std.primitive;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.api.AbstractPointable;
+import edu.uci.ics.hyracks.data.std.api.IComparable;
+import edu.uci.ics.hyracks.data.std.api.IHashable;
+import edu.uci.ics.hyracks.data.std.api.IPointable;
+import edu.uci.ics.hyracks.data.std.api.IPointableFactory;
+
+/**
+ * This class provides the raw bytes-based comparison and hash function for UTF8 strings.
+ * Note that the comparison may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
+ * But it works for single-byte character languages.
+ */
+public final class RawUTF8StringPointable extends AbstractPointable implements IHashable, IComparable {
+    public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public boolean isFixedLength() {
+            return false;
+        }
+
+        @Override
+        public int getFixedLength() {
+            return 0;
+        }
+    };
+
+    public static final IPointableFactory FACTORY = new IPointableFactory() {
+        private static final long serialVersionUID = 1L;
+
+        @Override
+        public IPointable createPointable() {
+            return new RawUTF8StringPointable();
+        }
+
+        @Override
+        public ITypeTraits getTypeTraits() {
+            return TYPE_TRAITS;
+        }
+    };
+
+    @Override
+    public int compareTo(IPointable pointer) {
+        return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+    }
+
+    @Override
+    public int compareTo(byte[] bytes, int start, int length) {
+        int utflen1 = UTF8StringPointable.getUTFLength(this.bytes, this.start);
+        int utflen2 = UTF8StringPointable.getUTFLength(bytes, start);
+
+        int c1 = 0;
+        int c2 = 0;
+
+        int s1Start = this.start + 2;
+        int s2Start = start + 2;
+
+        while (c1 < utflen1 && c2 < utflen2) {
+            char ch1 = (char) this.bytes[s1Start + c1];
+            char ch2 = (char) bytes[s2Start + c2];
+
+            if (ch1 != ch2) {
+                return ch1 - ch2;
+            }
+            c1++;
+            c2++;
+        }
+        return utflen1 - utflen2;
+    }
+
+    @Override
+    public int hash() {
+        int h = 0;
+        int utflen = UTF8StringPointable.getUTFLength(bytes, start);
+        int sStart = start + 2;
+        int c = 0;
+
+        while (c < utflen) {
+            char ch = (char) bytes[sStart + c];
+            h = 31 * h + ch;
+            c++;
+        }
+        return h;
+    }
+
+    public void toString(StringBuilder buffer) {
+        UTF8StringPointable.toString(buffer, bytes, start);
+    }
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/UTF8StringPointable.java
index f6d6093..866ebb0 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -216,4 +216,4 @@
     public void toString(StringBuilder buffer) {
         toString(buffer, bytes, start);
     }
-}
\ No newline at end of file
+}
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ArrayBackedValueStorage.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ArrayBackedValueStorage.java
index e8dc9b4..7d7d2c1 100644
--- a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ArrayBackedValueStorage.java
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/ArrayBackedValueStorage.java
@@ -1,34 +1,28 @@
 package edu.uci.ics.hyracks.data.std.util;
 
 import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.api.IMutableValueStorage;
 import edu.uci.ics.hyracks.data.std.api.IValueReference;
 
 public class ArrayBackedValueStorage implements IMutableValueStorage {
-    private final ByteArrayAccessibleOutputStream baaos;
-    private final DataOutputStream dos;
-
-    public ArrayBackedValueStorage() {
-        baaos = new ByteArrayAccessibleOutputStream();
-        dos = new DataOutputStream(baaos);
-    }
+   
+    private final GrowableArray data = new GrowableArray();
 
     @Override
     public void reset() {
-        baaos.reset();
+        data.reset();
     }
 
     @Override
     public DataOutput getDataOutput() {
-        return dos;
+        return data.getDataOutput();
     }
 
     @Override
     public byte[] getByteArray() {
-        return baaos.getByteArray();
+        return data.getByteArray();
     }
 
     @Override
@@ -38,12 +32,12 @@
 
     @Override
     public int getLength() {
-        return baaos.size();
+        return data.getLength();
     }
 
     public void append(IValueReference value) {
         try {
-            dos.write(value.getByteArray(), value.getStartOffset(), value.getLength());
+            data.append(value);
         } catch (IOException e) {
             e.printStackTrace();
         }
diff --git a/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/GrowableArray.java b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/GrowableArray.java
new file mode 100644
index 0000000..c174d4e
--- /dev/null
+++ b/hyracks/hyracks-data/hyracks-data-std/src/main/java/edu/uci/ics/hyracks/data/std/util/GrowableArray.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.data.std.util;
+
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
+import edu.uci.ics.hyracks.data.std.api.IValueReference;
+
+public class GrowableArray implements IDataOutputProvider {
+    private final ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
+    private final DataOutputStream dos = new DataOutputStream(baaos);
+
+    @Override
+    public DataOutput getDataOutput() {
+        return dos;
+    }
+
+    public void reset() {
+        baaos.reset();
+    }
+
+    public byte[] getByteArray() {
+        return baaos.getByteArray();
+    }
+
+    public int getLength() {
+        return baaos.size();
+    }
+
+    public void append(IValueReference value) throws IOException {
+        dos.write(value.getByteArray(), value.getStartOffset(), value.getLength());
+    }
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ArrayTupleBuilder.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ArrayTupleBuilder.java
index 989bc6b..8c865c4 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ArrayTupleBuilder.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/comm/io/ArrayTupleBuilder.java
@@ -15,14 +15,13 @@
 package edu.uci.ics.hyracks.dataflow.common.comm.io;
 
 import java.io.DataOutput;
-import java.io.DataOutputStream;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.data.std.api.IDataOutputProvider;
-import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 
 /**
  * Array backed tuple builder.
@@ -30,25 +29,21 @@
  * @author vinayakb
  */
 public class ArrayTupleBuilder implements IDataOutputProvider {
-    private final ByteArrayAccessibleOutputStream baaos;
-    private final DataOutputStream dos;
+    private final GrowableArray fieldData = new GrowableArray();
     private final int[] fEndOffsets;
     private int nextField;
 
     public ArrayTupleBuilder(int nFields) {
-        baaos = new ByteArrayAccessibleOutputStream();
-        dos = new DataOutputStream(baaos);
         fEndOffsets = new int[nFields];
     }
 
     /**
      * Resets the builder.
-     * 
      * reset() must be called before attempting to create a new tuple.
      */
     public void reset() {
         nextField = 0;
-        baaos.reset();
+        fieldData.reset();
     }
 
     /**
@@ -66,7 +61,7 @@
      * @return Data byte array.
      */
     public byte[] getByteArray() {
-        return baaos.getByteArray();
+        return fieldData.getByteArray();
     }
 
     /**
@@ -75,7 +70,7 @@
      * @return data area size.
      */
     public int getSize() {
-        return baaos.size();
+        return fieldData.getLength();
     }
 
     /**
@@ -96,14 +91,15 @@
         int fStartOffset = accessor.getFieldStartOffset(tIndex, fIndex);
         int fLen = accessor.getFieldEndOffset(tIndex, fIndex) - fStartOffset;
         try {
-            dos.write(accessor.getBuffer().array(), startOffset + accessor.getFieldSlotsLength() + fStartOffset, fLen);
+            fieldData.getDataOutput().write(accessor.getBuffer().array(),
+                    startOffset + accessor.getFieldSlotsLength() + fStartOffset, fLen);
             if (FrameConstants.DEBUG_FRAME_IO) {
-                dos.writeInt(FrameConstants.FRAME_FIELD_MAGIC);
+                fieldData.getDataOutput().writeInt(FrameConstants.FRAME_FIELD_MAGIC);
             }
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
-        fEndOffsets[nextField++] = baaos.size();
+        fEndOffsets[nextField++] = fieldData.getLength();
     }
 
     /**
@@ -117,8 +113,8 @@
      * @throws HyracksDataException
      */
     public <T> void addField(ISerializerDeserializer<T> serDeser, T instance) throws HyracksDataException {
-        serDeser.serialize(instance, dos);
-        fEndOffsets[nextField++] = baaos.size();
+        serDeser.serialize(instance, fieldData.getDataOutput());
+        fEndOffsets[nextField++] = fieldData.getLength();
     }
 
     /**
@@ -134,11 +130,11 @@
      */
     public void addField(byte[] bytes, int start, int length) throws HyracksDataException {
         try {
-            dos.write(bytes, start, length);
+            fieldData.getDataOutput().write(bytes, start, length);
         } catch (IOException e) {
             throw new HyracksDataException(e);
         }
-        fEndOffsets[nextField++] = baaos.size();
+        fEndOffsets[nextField++] = fieldData.getLength();
     }
 
     /**
@@ -146,7 +142,14 @@
      */
     @Override
     public DataOutput getDataOutput() {
-        return dos;
+        return fieldData.getDataOutput();
+    }
+
+    /**
+     * Get the growable array storing the field data.
+     */
+    public GrowableArray getFieldData() {
+        return fieldData;
     }
 
     /**
@@ -156,6 +159,6 @@
      * data.
      */
     public void addFieldEndOffset() {
-        fEndOffsets[nextField++] = baaos.size();
+        fEndOffsets[nextField++] = fieldData.getLength();
     }
 }
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/DoubleNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/DoubleNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..e95e9c2
--- /dev/null
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/DoubleNormalizedKeyComputerFactory.java
@@ -0,0 +1,28 @@
+package edu.uci.ics.hyracks.dataflow.common.data.normalizers;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+
+public class DoubleNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public INormalizedKeyComputer createNormalizedKeyComputer() {
+        return new INormalizedKeyComputer() {
+
+            @Override
+            public int normalize(byte[] bytes, int start, int length) {
+                int prefix = IntegerSerializerDeserializer.getInt(bytes, start);
+                if (prefix >= 0) {
+                    return prefix ^ Integer.MIN_VALUE;
+                } else {
+                    return (int) ((long) 0xffffffff - (long) prefix);
+                }
+            }
+
+        };
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/FloatNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/FloatNormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..d58afc1
--- /dev/null
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/FloatNormalizedKeyComputerFactory.java
@@ -0,0 +1,28 @@
+package edu.uci.ics.hyracks.dataflow.common.data.normalizers;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+
+public class FloatNormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+    private static final long serialVersionUID = 1L;
+
+    @Override
+    public INormalizedKeyComputer createNormalizedKeyComputer() {
+        return new INormalizedKeyComputer() {
+
+            @Override
+            public int normalize(byte[] bytes, int start, int length) {
+                int prefix = IntegerSerializerDeserializer.getInt(bytes, start);
+                if (prefix >= 0) {
+                    return prefix ^ Integer.MIN_VALUE;
+                } else {
+                    return (int) ((long) 0xffffffff - (long) prefix);
+                }
+            }
+
+        };
+    }
+
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/Integer64NormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/Integer64NormalizedKeyComputerFactory.java
new file mode 100644
index 0000000..4589909
--- /dev/null
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/Integer64NormalizedKeyComputerFactory.java
@@ -0,0 +1,55 @@
+package edu.uci.ics.hyracks.dataflow.common.data.normalizers;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputer;
+import edu.uci.ics.hyracks.api.dataflow.value.INormalizedKeyComputerFactory;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.Integer64SerializerDeserializer;
+
+public class Integer64NormalizedKeyComputerFactory implements INormalizedKeyComputerFactory {
+
+    private static final long serialVersionUID = 8735044913496854551L;
+
+    @Override
+    public INormalizedKeyComputer createNormalizedKeyComputer() {
+        return new INormalizedKeyComputer() {
+            private static final int POSTIVE_LONG_MASK = (3 << 30);
+            private static final int NON_NEGATIVE_INT_MASK = (2 << 30);
+            private static final int NEGATIVE_LONG_MASK = (0 << 30);
+
+            @Override
+            public int normalize(byte[] bytes, int start, int length) {
+                long value = Integer64SerializerDeserializer.getLong(bytes, start);
+                int highValue = (int) (value >> 32);
+                if (highValue > 0) {
+                    /**
+                     * larger than Integer.MAX
+                     */
+                    int highNmk = getKey(highValue);
+                    highNmk >>= 2;
+                    highNmk |= POSTIVE_LONG_MASK;
+                    return highNmk;
+                } else if (highValue == 0) {
+                    /**
+                     * smaller than Integer.MAX but >=0
+                     */
+                    int lowNmk = (int) value;
+                    lowNmk >>= 2;
+                    lowNmk |= NON_NEGATIVE_INT_MASK;
+                    return lowNmk;
+                } else {
+                    /**
+                     * less than 0: have not optimized for that
+                     */
+                    int highNmk = getKey(highValue);
+                    highNmk >>= 2;
+                    highNmk |= NEGATIVE_LONG_MASK;
+                    return highNmk;
+                }
+            }
+
+            private int getKey(int value) {
+                return value ^ Integer.MIN_VALUE;
+            }
+
+        };
+    }
+}
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/IntegerNormalizedKeyComputerFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/IntegerNormalizedKeyComputerFactory.java
index 2f7a778..6a01842 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/IntegerNormalizedKeyComputerFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/normalizers/IntegerNormalizedKeyComputerFactory.java
@@ -27,8 +27,7 @@
             @Override
             public int normalize(byte[] bytes, int start, int length) {
                 int value = IntegerSerializerDeserializer.getInt(bytes, start);
-                long unsignedValue = (long) value;
-                return (int) ((unsignedValue - ((long) Integer.MIN_VALUE)) & 0xffffffffL);
+                return value ^Integer.MIN_VALUE;
             }
         };
     }
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/parsers/LongParserFactory.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/parsers/LongParserFactory.java
index 6a9dab7..d9191c7 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/parsers/LongParserFactory.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/parsers/LongParserFactory.java
@@ -32,7 +32,7 @@
         return new IValueParser() {
             @Override
             public void parse(char[] buffer, int start, int length, DataOutput out) throws HyracksDataException {
-                long n = 0;
+                int n = 0;
                 int sign = 1;
                 int i = 0;
                 boolean pre = true;
@@ -102,7 +102,7 @@
                             throw new HyracksDataException("Encountered " + ch);
                     }
                 }
-
+                
                 try {
                     out.writeLong(n * sign);
                 } catch (IOException e) {
diff --git a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
index a29209c..8a30a71 100644
--- a/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
+++ b/hyracks/hyracks-dataflow-common/src/main/java/edu/uci/ics/hyracks/dataflow/common/data/util/StringUtils.java
@@ -18,17 +18,19 @@
 import java.io.IOException;
 
 public class StringUtils {
-    public static void writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
-
+    public static int writeCharAsModifiedUTF8(char c, DataOutput dos) throws IOException {
         if (c >= 0x0000 && c <= 0x007F) {
             dos.writeByte(c);
+            return 1;
         } else if (c <= 0x07FF) {
             dos.writeByte((byte) (0xC0 | ((c >> 6) & 0x3F)));
             dos.writeByte((byte) (0x80 | (c & 0x3F)));
+            return 2;
         } else {
             dos.writeByte((byte) (0xE0 | ((c >> 12) & 0x0F)));
             dos.writeByte((byte) (0x80 | ((c >> 6) & 0x3F)));
             dos.writeByte((byte) (0x80 | (c & 0x3F)));
+            return 3;
         }
     }
 
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
index 7e84229..6870e71 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoin.java
@@ -14,15 +14,18 @@
  */
 package edu.uci.ics.hyracks.dataflow.std.join;
 
+import java.io.DataOutput;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 
 import edu.uci.ics.hyracks.api.comm.IFrameWriter;
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
 import edu.uci.ics.hyracks.dataflow.common.comm.util.FrameUtils;
@@ -42,9 +45,12 @@
     private RunFileReader runFileReader;
     private int currentMemSize = 0;
     private final RunFileWriter runFileWriter;
+    private final boolean isLeftOuter;
+    private final ArrayTupleBuilder nullTupleBuilder;
 
     public NestedLoopJoin(IHyracksTaskContext ctx, FrameTupleAccessor accessor0, FrameTupleAccessor accessor1,
-            ITuplePairComparator comparators, int memSize) throws HyracksDataException {
+            ITuplePairComparator comparators, int memSize, boolean isLeftOuter, INullWriter[] nullWriters1)
+            throws HyracksDataException {
         this.accessorInner = accessor1;
         this.accessorOuter = accessor0;
         this.appender = new FrameTupleAppender(ctx.getFrameSize());
@@ -56,6 +62,19 @@
         this.memSize = memSize;
         this.ctx = ctx;
 
+        this.isLeftOuter = isLeftOuter;
+        if (isLeftOuter) {
+            int innerFieldCount = accessorInner.getFieldCount();
+            nullTupleBuilder = new ArrayTupleBuilder(innerFieldCount);
+            DataOutput out = nullTupleBuilder.getDataOutput();
+            for (int i = 0; i < innerFieldCount; i++) {
+                nullWriters1[i].writeNull(out);
+                nullTupleBuilder.addFieldEndOffset();
+            }
+        } else {
+            nullTupleBuilder = null;
+        }
+
         FileReference file = ctx.getJobletContext().createManagedWorkspaceFile(
                 this.getClass().getSimpleName() + this.toString());
         runFileWriter = new RunFileWriter(file, ctx.getIOManager());
@@ -108,9 +127,11 @@
         int tupleCount1 = accessorInner.getTupleCount();
 
         for (int i = 0; i < tupleCount0; ++i) {
+            boolean matchFound = false;
             for (int j = 0; j < tupleCount1; ++j) {
                 int c = compare(accessorOuter, i, accessorInner, j);
                 if (c == 0) {
+                    matchFound = true;
                     if (!appender.appendConcat(accessorOuter, i, accessorInner, j)) {
                         flushFrame(outBuffer, writer);
                         appender.reset(outBuffer, true);
@@ -120,6 +141,18 @@
                     }
                 }
             }
+
+            if (!matchFound && isLeftOuter) {
+                if (!appender.appendConcat(accessorOuter, i, nullTupleBuilder.getFieldEndOffsets(),
+                        nullTupleBuilder.getByteArray(), 0, nullTupleBuilder.getSize())) {
+                    flushFrame(outBuffer, writer);
+                    appender.reset(outBuffer, true);
+                    if (!appender.appendConcat(accessorOuter, i, nullTupleBuilder.getFieldEndOffsets(),
+                            nullTupleBuilder.getByteArray(), 0, nullTupleBuilder.getSize())) {
+                        throw new IllegalStateException();
+                    }
+                }
+            }
         }
     }
 
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoinOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoinOperatorDescriptor.java
index a699703..0be01c1 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoinOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/NestedLoopJoinOperatorDescriptor.java
@@ -25,6 +25,8 @@
 import edu.uci.ics.hyracks.api.dataflow.IActivityGraphBuilder;
 import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
 import edu.uci.ics.hyracks.api.dataflow.TaskId;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparatorFactory;
@@ -47,13 +49,18 @@
     private static final long serialVersionUID = 1L;
     private final ITuplePairComparatorFactory comparatorFactory;
     private final int memSize;
+    private final boolean isLeftOuter;
+    private final INullWriterFactory[] nullWriterFactories1;
 
     public NestedLoopJoinOperatorDescriptor(IOperatorDescriptorRegistry spec,
-            ITuplePairComparatorFactory comparatorFactory, RecordDescriptor recordDescriptor, int memSize) {
+            ITuplePairComparatorFactory comparatorFactory, RecordDescriptor recordDescriptor, int memSize,
+            boolean isLeftOuter, INullWriterFactory[] nullWriterFactories1) {
         super(spec, 2, 1);
         this.comparatorFactory = comparatorFactory;
         this.recordDescriptors[0] = recordDescriptor;
         this.memSize = memSize;
+        this.isLeftOuter = isLeftOuter;
+        this.nullWriterFactories1 = nullWriterFactories1;
     }
 
     @Override
@@ -111,6 +118,13 @@
             final RecordDescriptor rd1 = recordDescProvider.getInputRecordDescriptor(getActivityId(), 0);
             final ITuplePairComparator comparator = comparatorFactory.createTuplePairComparator(ctx);
 
+            final INullWriter[] nullWriters1 = isLeftOuter ? new INullWriter[nullWriterFactories1.length] : null;
+            if (isLeftOuter) {
+                for (int i = 0; i < nullWriterFactories1.length; i++) {
+                    nullWriters1[i] = nullWriterFactories1[i].createNullWriter();
+                }
+            }
+
             IOperatorNodePushable op = new AbstractUnaryInputSinkOperatorNodePushable() {
                 private JoinCacheTaskState state;
 
@@ -118,8 +132,11 @@
                 public void open() throws HyracksDataException {
                     state = new JoinCacheTaskState(ctx.getJobletContext().getJobId(), new TaskId(getActivityId(),
                             partition));
+
                     state.joiner = new NestedLoopJoin(ctx, new FrameTupleAccessor(ctx.getFrameSize(), rd0),
-                            new FrameTupleAccessor(ctx.getFrameSize(), rd1), comparator, memSize);
+                            new FrameTupleAccessor(ctx.getFrameSize(), rd1), comparator, memSize, isLeftOuter,
+                            nullWriters1);
+
                 }
 
                 @Override
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
index 2905574..6e2b16a 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoin.java
@@ -81,6 +81,8 @@
 
     private int[] buildPSizeInFrames; //Used for partition tuning
     private int freeFramesCounter; //Used for partition tuning
+    
+    private boolean isTableEmpty;	//Added for handling the case, where build side is empty (tableSize is 0)
 
     public OptimizedHybridHashJoin(IHyracksTaskContext ctx, int memForJoin, int numOfPartitions, String rel0Name,
             String rel1Name, int[] keys0, int[] keys1, IBinaryComparator[] comparators, RecordDescriptor buildRd,
@@ -89,10 +91,10 @@
         this.memForJoin = memForJoin;
         this.buildRd = buildRd;
         this.probeRd = probeRd;
-        this.buildHpc = probeHpc;
-        this.probeHpc = buildHpc;
-        this.buildKeys = keys0;
-        this.probeKeys = keys1;
+        this.buildHpc = buildHpc; 	
+        this.probeHpc = probeHpc; 	
+        this.buildKeys = keys1; 	
+        this.probeKeys = keys0;		
         this.comparators = comparators;
         this.rel0Name = rel0Name;
         this.rel1Name = rel1Name;
@@ -117,10 +119,10 @@
         this.memForJoin = memForJoin;
         this.buildRd = buildRd;
         this.probeRd = probeRd;
-        this.buildHpc = probeHpc;
-        this.probeHpc = buildHpc;
-        this.buildKeys = keys0;
-        this.probeKeys = keys1;
+        this.buildHpc = buildHpc; 	
+        this.probeHpc = probeHpc; 	
+        this.buildKeys = keys1; 	
+        this.probeKeys = keys0;		
         this.comparators = comparators;
         this.rel0Name = rel0Name;
         this.rel1Name = rel1Name;
@@ -171,6 +173,12 @@
     public void build(ByteBuffer buffer) throws HyracksDataException {
         accessorBuild.reset(buffer);
         int tupleCount = accessorBuild.getTupleCount();
+   
+        boolean print = false;
+    	if(print){
+    		accessorBuild.prettyPrint();
+    	}
+        
         for (int i = 0; i < tupleCount; ++i) {
             int pid = buildHpc.partition(accessorBuild, i, numOfPartitions);
             processTuple(i, pid);
@@ -338,6 +346,7 @@
 
         createInMemoryJoiner(inMemTupCount);
         cacheInMemJoin();
+        this.isTableEmpty = (inMemTupCount == 0);
     }
 
     private void partitionTune() throws HyracksDataException {
@@ -457,10 +466,14 @@
     }
 
     public void probe(ByteBuffer buffer, IFrameWriter writer) throws HyracksDataException {
-
         accessorProbe.reset(buffer);
         int tupleCount = accessorProbe.getTupleCount();
 
+        boolean print = false;
+    	if(print){
+    		accessorProbe.prettyPrint();
+    	}
+        
         if (numOfSpilledParts == 0) {
             inMemJoiner.join(buffer, writer);
             return;
@@ -604,4 +617,8 @@
                 + freeFramesCounter;
         return s;
     }
+    
+    public boolean isTableEmpty(){
+    	return this.isTableEmpty;
+    }
 }
\ No newline at end of file
diff --git a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
index 3a7ee2c..19479df 100644
--- a/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
+++ b/hyracks/hyracks-dataflow-std/src/main/java/edu/uci/ics/hyracks/dataflow/std/join/OptimizedHybridHashJoinOperatorDescriptor.java
@@ -167,10 +167,10 @@
         ProbeAndJoinActivityNode phase2 = new ProbeAndJoinActivityNode(probeAid, buildAid);
 
         builder.addActivity(this, phase1);
-        builder.addSourceEdge(0, phase1, 0);
+        builder.addSourceEdge(1, phase1, 0);
 
         builder.addActivity(this, phase2);
-        builder.addSourceEdge(1, phase2, 0);
+        builder.addSourceEdge(0, phase2, 0);
 
         builder.addBlockingEdge(phase1, phase2);
 
@@ -253,14 +253,7 @@
             for (int i = 0; i < comparatorFactories.length; i++) {
                 comparators[i] = comparatorFactories[i].createBinaryComparator();
             }
-
-            final INullWriter[] nullWriters1 = isLeftOuter ? new INullWriter[nullWriterFactories1.length] : null;
-            if (isLeftOuter) {
-                for (int i = 0; i < nullWriterFactories1.length; i++) {
-                    nullWriters1[i] = nullWriterFactories1[i].createNullWriter();
-                }
-            }
-
+            
             IOperatorNodePushable op = new AbstractUnaryInputSinkOperatorNodePushable() {
                 private BuildAndPartitionTaskState state = new BuildAndPartitionTaskState(ctx.getJobletContext()
                         .getJobId(), new TaskId(getActivityId(), partition));
@@ -278,9 +271,17 @@
                     state.memForJoin = memsize - 2;
                     state.numOfPartitions = getNumberOfPartitions(state.memForJoin, inputsize0, fudgeFactor,
                             nPartitions);
-                    state.hybridHJ = new OptimizedHybridHashJoin(ctx, state.memForJoin, state.numOfPartitions,
-                            PROBE_REL, BUILD_REL, probeKeys, buildKeys, comparators, probeRd, buildRd, probeHpc,
-                            buildHpc);
+                    if(!isLeftOuter){
+                    	state.hybridHJ = new OptimizedHybridHashJoin(ctx, state.memForJoin, state.numOfPartitions,
+                                PROBE_REL, BUILD_REL, probeKeys, buildKeys, comparators, probeRd, buildRd, probeHpc,
+                                buildHpc);
+                    }
+                    else{
+                    	state.hybridHJ = new OptimizedHybridHashJoin(ctx, state.memForJoin, state.numOfPartitions,
+                                PROBE_REL, BUILD_REL, probeKeys, buildKeys, comparators, probeRd, buildRd, probeHpc,
+                                buildHpc, isLeftOuter, nullWriterFactories1);
+                    }
+                    
                     state.hybridHJ.initBuild();
                 }
 
@@ -368,7 +369,9 @@
 
                 @Override
                 public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
-                    state.hybridHJ.probe(buffer, writer);
+                	if(!state.hybridHJ.isTableEmpty()){
+                		state.hybridHJ.probe(buffer, writer);
+                	}
                 }
 
                 @Override
@@ -604,7 +607,7 @@
                         throws HyracksDataException {
 
                     NestedLoopJoin nlj = new NestedLoopJoin(ctx, new FrameTupleAccessor(ctx.getFrameSize(), outerRd),
-                            new FrameTupleAccessor(ctx.getFrameSize(), innerRd), nljComparator, memorySize);
+                            new FrameTupleAccessor(ctx.getFrameSize(), innerRd), nljComparator, memorySize, false, null);
 
                     ByteBuffer cacheBuff = ctx.allocateFrame();
                     innerReader.open();
diff --git a/hyracks/hyracks-dist/pom.xml b/hyracks/hyracks-dist/pom.xml
new file mode 100755
index 0000000..4f9fc20
--- /dev/null
+++ b/hyracks/hyracks-dist/pom.xml
@@ -0,0 +1,74 @@
+<?xml version="1.0"?>
+<project
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+	xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+	<modelVersion>4.0.0</modelVersion>
+	<parent>
+		<artifactId>hyracks</artifactId>
+		<groupId>edu.uci.ics.hyracks</groupId>
+		<version>0.2.2-SNAPSHOT</version>
+	</parent>
+
+	<artifactId>hyracks-dist</artifactId>
+	<name>hyracks-dist</name>
+
+	<properties>
+		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+	</properties>
+
+	<build>
+		<plugins>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-compiler-plugin</artifactId>
+				<version>2.0.2</version>
+				<configuration>
+					<source>1.6</source>
+					<target>1.6</target>
+				</configuration>
+			</plugin>
+			<plugin>
+				<artifactId>maven-resources-plugin</artifactId>
+				<version>2.5</version>
+				<executions>
+					<execution>
+						<id>copy-scripts</id>
+						<!-- here the phase you need -->
+						<phase>package</phase>
+						<goals>
+							<goal>copy-resources</goal>
+						</goals>
+						<configuration>
+							<outputDirectory>target/appassembler/</outputDirectory>
+							<resources>
+								<resource>
+									<directory>src/main/resources</directory>
+								</resource>
+							</resources>
+							<directoryMode>0755</directoryMode>
+						</configuration>
+					</execution>
+				</executions>
+			</plugin>
+			<plugin>
+				<groupId>org.apache.maven.plugins</groupId>
+				<artifactId>maven-antrun-plugin</artifactId>
+				<version>1.6</version>
+				<executions>
+					<execution>
+						<id>process-test-classes</id>
+						<phase>package</phase>
+						<configuration>
+							<target>
+								<chmod file="target/appassembler/bin/*)" perm="755" />
+							</target>
+						</configuration>
+						<goals>
+							<goal>run</goal>
+						</goals>
+					</execution>
+				</executions>
+			</plugin>
+		</plugins>
+	</build>
+</project>
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/getip.sh b/hyracks/hyracks-dist/src/main/resources/bin/getip.sh
new file mode 100755
index 0000000..e0cdf73
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/getip.sh
@@ -0,0 +1,21 @@
+#get the OS
+OS_NAME=`uname -a|awk '{print $1}'`
+LINUX_OS='Linux'
+
+if [ $OS_NAME = $LINUX_OS ];
+then
+        #Get IP Address
+        IPADDR=`/sbin/ifconfig eth0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+		IPADDR=`/sbin/ifconfig lo | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi 
+else
+        IPADDR=`/sbin/ifconfig en1 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+	if [ "$IPADDR" = "" ]
+        then
+                IPADDR=`/sbin/ifconfig lo0 | grep "inet " | awk '{print $2}' | cut -f 2 -d ':'`
+        fi
+
+fi
+echo $IPADDR
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startAllNCs.sh b/hyracks/hyracks-dist/src/main/resources/bin/startAllNCs.sh
new file mode 100755
index 0000000..629bd90
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${PREGELIX_PATH}; bin/startnc.sh"
+done
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startCluster.sh b/hyracks/hyracks-dist/src/main/resources/bin/startCluster.sh
new file mode 100755
index 0000000..a0c2063
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startCluster.sh
@@ -0,0 +1,3 @@
+bin/startcc.sh
+sleep 5
+bin/startAllNCs.sh
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startDebugNc.sh b/hyracks/hyracks-dist/src/main/resources/bin/startDebugNc.sh
new file mode 100755
index 0000000..fe6cf27
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startDebugNc.sh
@@ -0,0 +1,50 @@
+hostname
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+. conf/debugnc.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR2
+mkdir $NCTMP_DIR2
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR2
+mkdir $NCLOGS_DIR2
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS2 | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+#Get OS
+IPADDR=`bin/getip.sh`
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+NODEID=${NODEID}2
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS2
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR2
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS2}" &> $NCLOGS_DIR2/$NODEID.log &
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh b/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh
new file mode 100755
index 0000000..fe2551d
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startcc.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+hostname
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CCHOST=`bin/getip.sh`
+
+#Remove the temp dir
+rm -rf $CCTMP_DIR
+mkdir $CCTMP_DIR
+
+#Remove the logs dir
+rm -rf $CCLOGS_DIR
+mkdir $CCLOGS_DIR
+
+#Export JAVA_HOME and JAVA_OPTS
+export JAVA_HOME=$JAVA_HOME
+export JAVA_OPTS=$CCJAVA_OPTS
+
+#Launch hyracks cc script
+chmod -R 755 $HYRACKS_HOME
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyrackscc -client-net-ip-address $CCHOST -cluster-net-ip-address $CCHOST -client-net-port $CC_CLIENTPORT -cluster-net-port $CC_CLUSTERPORT -max-heartbeat-lapse-periods 999999 -default-max-job-attempts 0 -job-history-size 3 &> $CCLOGS_DIR/cc.log &
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/startnc.sh b/hyracks/hyracks-dist/src/main/resources/bin/startnc.sh
new file mode 100755
index 0000000..6e0f90e
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/startnc.sh
@@ -0,0 +1,49 @@
+hostname
+
+MY_NAME=`hostname`
+#Get the IP address of the cc
+CCHOST_NAME=`cat conf/master`
+CURRENT_PATH=`pwd`
+CCHOST=`ssh ${CCHOST_NAME} "cd ${CURRENT_PATH}; bin/getip.sh"`
+
+#Import cluster properties
+. conf/cluster.properties
+
+#Clean up temp dir
+
+rm -rf $NCTMP_DIR
+mkdir $NCTMP_DIR
+
+#Clean up log dir
+rm -rf $NCLOGS_DIR
+mkdir $NCLOGS_DIR
+
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir
+	mkdir $io_dir
+done
+
+#Set JAVA_HOME
+export JAVA_HOME=$JAVA_HOME
+
+IPADDR=`bin/getip.sh`
+#echo $IPADDR
+
+#Get node ID
+NODEID=`hostname | cut -d '.' -f 1`
+
+#Set JAVA_OPTS
+export JAVA_OPTS=$NCJAVA_OPTS
+
+cd $HYRACKS_HOME
+HYRACKS_HOME=`pwd`
+
+#Enter the temp dir
+cd $NCTMP_DIR
+
+#Launch hyracks nc
+$HYRACKS_HOME/hyracks-server/target/appassembler/bin/hyracksnc -cc-host $CCHOST -cc-port $CC_CLUSTERPORT -cluster-net-ip-address $IPADDR  -data-ip-address $IPADDR -node-id $NODEID -iodevices "${IO_DIRS}" &> $NCLOGS_DIR/$NODEID.log &
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/stopAllNCs.sh b/hyracks/hyracks-dist/src/main/resources/bin/stopAllNCs.sh
new file mode 100755
index 0000000..12367c1
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/stopAllNCs.sh
@@ -0,0 +1,6 @@
+PREGELIX_PATH=`pwd`
+
+for i in `cat conf/slaves`
+do
+   ssh $i "cd ${PREGELIX_PATH}; bin/stopnc.sh"
+done
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/stopCluster.sh b/hyracks/hyracks-dist/src/main/resources/bin/stopCluster.sh
new file mode 100755
index 0000000..4889934
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/stopCluster.sh
@@ -0,0 +1,3 @@
+bin/stopAllNCs.sh
+sleep 2
+bin/stopcc.sh
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/stopcc.sh b/hyracks/hyracks-dist/src/main/resources/bin/stopcc.sh
new file mode 100755
index 0000000..c2f525a
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/stopcc.sh
@@ -0,0 +1,10 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep hyracks|awk '{print $2}'`
+echo $PID
+kill -9 $PID
+
+#Clean up CC temp dir
+rm -rf $CCTMP_DIR/*
diff --git a/hyracks/hyracks-dist/src/main/resources/bin/stopnc.sh b/hyracks/hyracks-dist/src/main/resources/bin/stopnc.sh
new file mode 100755
index 0000000..03ce4e7
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/bin/stopnc.sh
@@ -0,0 +1,23 @@
+hostname
+. conf/cluster.properties
+
+#Kill process
+PID=`ps -ef|grep ${USER}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+
+if [ "$PID" == "" ]; then
+  USERID=`id | sed 's/^uid=//;s/(.*$//'`
+  PID=`ps -ef|grep ${USERID}|grep java|grep 'Dapp.name=hyracksnc'|awk '{print $2}'`
+fi
+
+echo $PID
+kill -9 $PID
+
+#Clean up I/O working dir
+io_dirs=$(echo $IO_DIRS | tr "," "\n")
+for io_dir in $io_dirs
+do
+	rm -rf $io_dir/*
+done
+
+#Clean up NC temp dir
+rm -rf $NCTMP_DIR/*
diff --git a/hyracks/hyracks-dist/src/main/resources/conf/cluster.properties b/hyracks/hyracks-dist/src/main/resources/conf/cluster.properties
new file mode 100755
index 0000000..3b382f7
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/conf/cluster.properties
@@ -0,0 +1,37 @@
+#The CC port for Hyracks clients
+CC_CLIENTPORT=3099
+
+#The CC port for Hyracks cluster management
+CC_CLUSTERPORT=1099
+
+#The directory of hyracks binaries
+HYRACKS_HOME=../../../
+
+#The tmp directory for cc to install jars
+CCTMP_DIR=/tmp/t1
+
+#The tmp directory for nc to install jars
+NCTMP_DIR=/tmp/t2
+
+#The directory to put cc logs
+CCLOGS_DIR=$CCTMP_DIR/logs
+
+#The directory to put nc logs
+NCLOGS_DIR=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS="/tmp/t3,/tmp/t4"
+
+#The JAVA_HOME
+JAVA_HOME=$JAVA_HOME
+
+#The frame size of the internal dataflow engine
+FRAME_SIZE=65536
+
+#CC JAVA_OPTS
+CCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7001,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+# Yourkit option: -agentpath:/grid/0/dev/vborkar/tools/yjp-10.0.4/bin/linux-x86-64/libyjpagent.so=port=20001"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,address=7002,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hyracks/hyracks-dist/src/main/resources/conf/debugnc.properties b/hyracks/hyracks-dist/src/main/resources/conf/debugnc.properties
new file mode 100755
index 0000000..27afa26
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/conf/debugnc.properties
@@ -0,0 +1,12 @@
+#The tmp directory for nc to install jars
+NCTMP_DIR2=/tmp/t-1
+
+#The directory to put nc logs
+NCLOGS_DIR2=$NCTMP_DIR/logs
+
+#Comma separated I/O directories for the spilling of external sort
+IO_DIRS2="/tmp/t-2,/tmp/t-3"
+
+#NC JAVA_OPTS
+NCJAVA_OPTS2="-Xdebug -Xrunjdwp:transport=dt_socket,address=7003,server=y,suspend=n -Xmx1g -Djava.util.logging.config.file=logging.properties"
+
diff --git a/hyracks/hyracks-dist/src/main/resources/conf/master b/hyracks/hyracks-dist/src/main/resources/conf/master
new file mode 100755
index 0000000..2fbb50c
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/conf/master
@@ -0,0 +1 @@
+localhost
diff --git a/hyracks/hyracks-dist/src/main/resources/conf/slaves b/hyracks/hyracks-dist/src/main/resources/conf/slaves
new file mode 100755
index 0000000..2fbb50c
--- /dev/null
+++ b/hyracks/hyracks-dist/src/main/resources/conf/slaves
@@ -0,0 +1 @@
+localhost
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
index 61d4696..622942b 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderHashJoinTest.java
@@ -52,30 +52,9 @@
 import edu.uci.ics.hyracks.dataflow.std.join.HybridHashJoinOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.join.InMemoryHashJoinOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.misc.MaterializingOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.NoopNullWriterFactory;
 
 public class TPCHCustomerOrderHashJoinTest extends AbstractIntegrationTest {
-    private static class NoopNullWriterFactory implements INullWriterFactory {
-
-        private static final long serialVersionUID = 1L;
-        public static final NoopNullWriterFactory INSTANCE = new NoopNullWriterFactory();
-
-        private NoopNullWriterFactory() {
-        }
-
-        @Override
-        public INullWriter createNullWriter() {
-            return new INullWriter() {
-                @Override
-                public void writeNull(DataOutput out) throws HyracksDataException {
-                    try {
-                        out.writeShort(0);
-                    } catch (IOException e) {
-                        throw new HyracksDataException(e);
-                    }
-                }
-            };
-        }
-    }
 
     /*
      * TPCH Customer table: CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
index 1e60372..9233e39 100644
--- a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/integration/TPCHCustomerOrderNestedLoopJoinTest.java
@@ -15,9 +15,7 @@
 package edu.uci.ics.hyracks.tests.integration;
 
 import java.io.File;
-
 import org.junit.Test;
-
 import edu.uci.ics.hyracks.api.comm.IFrameTupleAccessor;
 import edu.uci.ics.hyracks.api.constraints.PartitionConstraintHelper;
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
@@ -25,6 +23,7 @@
 import edu.uci.ics.hyracks.api.dataflow.IOperatorDescriptor;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
 import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparator;
 import edu.uci.ics.hyracks.api.dataflow.value.ITuplePairComparatorFactory;
@@ -45,6 +44,7 @@
 import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
 import edu.uci.ics.hyracks.dataflow.std.file.PlainFileWriterOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.join.NestedLoopJoinOperatorDescriptor;
+import edu.uci.ics.hyracks.tests.util.NoopNullWriterFactory;
 
 public class TPCHCustomerOrderNestedLoopJoinTest extends AbstractIntegrationTest {
     private static class JoinComparatorFactory implements ITuplePairComparatorFactory {
@@ -165,7 +165,8 @@
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID);
 
         NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(
-                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 4);
+                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 4, false,
+                null);
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID);
 
         IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -239,7 +240,8 @@
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
 
         NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(
-                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5);
+                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5, false,
+                null);
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
 
         IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -313,7 +315,8 @@
         PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
 
         NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(
-                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 6);
+                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 6, false,
+                null);
         PartitionConstraintHelper.addPartitionCountConstraint(spec, join, 2);
 
         IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
@@ -334,4 +337,84 @@
         runTest(spec);
     }
 
+    @Test
+    public void customerOrderCIDOuterJoinMulti() throws Exception {
+        JobSpecification spec = new JobSpecification();
+
+        FileSplit[] custSplits = new FileSplit[] {
+                new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/customer-part1.tbl"))),
+                new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/customer-part2.tbl"))) };
+        IFileSplitProvider custSplitsProvider = new ConstantFileSplitProvider(custSplits);
+        RecordDescriptor custDesc = new RecordDescriptor(new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE });
+
+        FileSplit[] ordersSplits = new FileSplit[] {
+                new FileSplit(NC1_ID, new FileReference(new File("data/tpch0.001/orders-part1.tbl"))),
+                new FileSplit(NC2_ID, new FileReference(new File("data/tpch0.001/orders-part2.tbl"))) };
+        IFileSplitProvider ordersSplitsProvider = new ConstantFileSplitProvider(ordersSplits);
+        RecordDescriptor ordersDesc = new RecordDescriptor(new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE });
+
+        RecordDescriptor custOrderJoinDesc = new RecordDescriptor(new ISerializerDeserializer[] {
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE, UTF8StringSerializerDeserializer.INSTANCE,
+                UTF8StringSerializerDeserializer.INSTANCE });
+
+        FileScanOperatorDescriptor ordScanner = new FileScanOperatorDescriptor(spec, ordersSplitsProvider,
+                new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE }, '|'), ordersDesc);
+        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, ordScanner, NC1_ID, NC2_ID);
+
+        FileScanOperatorDescriptor custScanner = new FileScanOperatorDescriptor(spec, custSplitsProvider,
+                new DelimitedDataTupleParserFactory(new IValueParserFactory[] { UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE, UTF8StringParserFactory.INSTANCE,
+                        UTF8StringParserFactory.INSTANCE }, '|'), custDesc);
+        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, custScanner, NC1_ID, NC2_ID);
+
+        INullWriterFactory[] nullWriterFactories = new INullWriterFactory[ordersDesc.getFieldCount()];
+        for (int j = 0; j < nullWriterFactories.length; j++) {
+            nullWriterFactories[j] = NoopNullWriterFactory.INSTANCE;
+        }
+
+        NestedLoopJoinOperatorDescriptor join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(
+                PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 1, 0), custOrderJoinDesc, 5, true,
+                nullWriterFactories);
+        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, join, NC1_ID, NC2_ID);
+
+        IFileSplitProvider outSplits = new ConstantFileSplitProvider(new FileSplit[] { new FileSplit(NC1_ID,
+                createTempFile().getAbsolutePath()) });
+        IOperatorDescriptor printer = new PlainFileWriterOperatorDescriptor(spec, outSplits, ",");
+        PartitionConstraintHelper.addAbsoluteLocationConstraint(spec, printer, NC1_ID);
+
+        IConnectorDescriptor ordJoinConn = new OneToOneConnectorDescriptor(spec);
+        spec.connect(ordJoinConn, ordScanner, 0, join, 0);
+
+        IConnectorDescriptor custJoinConn = new MToNReplicatingConnectorDescriptor(spec);
+        spec.connect(custJoinConn, custScanner, 0, join, 1);
+
+        IConnectorDescriptor joinPrinterConn = new MToNReplicatingConnectorDescriptor(spec);
+        spec.connect(joinPrinterConn, join, 0, printer, 0);
+
+        spec.addRoot(printer);
+        runTest(spec);
+    }
+
 }
\ No newline at end of file
diff --git a/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/NoopNullWriterFactory.java b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/NoopNullWriterFactory.java
new file mode 100644
index 0000000..d119509
--- /dev/null
+++ b/hyracks/hyracks-examples/hyracks-integration-tests/src/test/java/edu/uci/ics/hyracks/tests/util/NoopNullWriterFactory.java
@@ -0,0 +1,31 @@
+package edu.uci.ics.hyracks.tests.util;
+
+import java.io.DataOutput;
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriter;
+import edu.uci.ics.hyracks.api.dataflow.value.INullWriterFactory;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+
+public class NoopNullWriterFactory implements INullWriterFactory {
+
+    private static final long serialVersionUID = 1L;
+    public static final NoopNullWriterFactory INSTANCE = new NoopNullWriterFactory();
+
+    private NoopNullWriterFactory() {
+    }
+
+    @Override
+    public INullWriter createNullWriter() {
+        return new INullWriter() {
+            @Override
+            public void writeNull(DataOutput out) throws HyracksDataException {
+                try {
+                    out.writeShort(0);
+                } catch (IOException e) {
+                    throw new HyracksDataException(e);
+                }
+            }
+        };
+    }
+}
diff --git a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/edu/uci/ics/hyracks/examples/tpch/client/Main.java b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/edu/uci/ics/hyracks/examples/tpch/client/Main.java
index 01ccdef..0ad0ff0 100644
--- a/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/edu/uci/ics/hyracks/examples/tpch/client/Main.java
+++ b/hyracks/hyracks-examples/tpch-example/tpchclient/src/main/java/edu/uci/ics/hyracks/examples/tpch/client/Main.java
@@ -199,7 +199,7 @@
 
         if ("nestedloop".equalsIgnoreCase(algo)) {
             join = new NestedLoopJoinOperatorDescriptor(spec, new JoinComparatorFactory(
-                    PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), custOrderJoinDesc, memSize);
+                    PointableBinaryComparatorFactory.of(UTF8StringPointable.FACTORY), 0, 1), custOrderJoinDesc, memSize, false, null);
 
         } else if ("gracehash".equalsIgnoreCase(algo)) {
             join = new GraceHashJoinOperatorDescriptor(
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
index d00bea6..6744f70 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/dataflow/BinaryTokenizerOperatorNodePushable.java
@@ -15,13 +15,13 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.dataflow;
 
-import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
 import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
 import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
@@ -41,7 +41,7 @@
 
     private FrameTupleAccessor accessor;
     private ArrayTupleBuilder builder;
-    private DataOutput builderDos;
+    private GrowableArray builderFieldData;
     private FrameTupleAppender appender;
     private ByteBuffer writeBuffer;
 
@@ -60,7 +60,7 @@
         accessor = new FrameTupleAccessor(ctx.getFrameSize(), inputRecDesc);
         writeBuffer = ctx.allocateFrame();
         builder = new ArrayTupleBuilder(outputRecDesc.getFieldCount());
-        builderDos = builder.getDataOutput();
+        builderFieldData = builder.getFieldData();
         appender = new FrameTupleAppender(ctx.getFrameSize());
         appender.reset(writeBuffer, true);
         writer.open();
@@ -87,7 +87,7 @@
                     builder.reset();
                     try {
                         IToken token = tokenizer.getToken();
-                        token.serializeToken(builderDos);
+                        token.serializeToken(builderFieldData);
                         builder.addFieldEndOffset();
                     } catch (IOException e) {
                         throw new HyracksDataException(e.getMessage());
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
index 99e76dc..3525fc3 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/InvertedIndex.java
@@ -202,7 +202,9 @@
     public void endBulkLoad(IIndexBulkLoadContext ictx) throws HyracksDataException {
         // Create entry in btree for last inverted list.
         InvertedIndexBulkLoadContext ctx = (InvertedIndexBulkLoadContext) ictx;
-        createAndInsertBTreeTuple(ctx);
+        if (ctx.lastTuple.getFieldData(0) != null) {
+            createAndInsertBTreeTuple(ctx);
+        }
         btree.endBulkLoad(ctx.btreeBulkLoadCtx);
         ctx.deinit();
     }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
index 1f886a2..af5dad3 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/impls/TOccurrenceSearcher.java
@@ -15,7 +15,6 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.impls;
 
-import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
@@ -28,6 +27,7 @@
 import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
 import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
 import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
 import edu.uci.ics.hyracks.dataflow.common.comm.io.FrameTupleAppender;
@@ -75,7 +75,7 @@
     protected RecordDescriptor queryTokenRecDesc = new RecordDescriptor(
             new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE });
     protected ArrayTupleBuilder queryTokenBuilder = new ArrayTupleBuilder(queryTokenRecDesc.getFieldCount());
-    protected DataOutput queryTokenDos = queryTokenBuilder.getDataOutput();
+    protected GrowableArray queryTokenFieldData = queryTokenBuilder.getFieldData();
     protected FrameTupleAppender queryTokenAppender;
     protected ByteBuffer queryTokenFrame;
 
@@ -158,7 +158,7 @@
             queryTokenBuilder.reset();
             try {
                 IToken token = queryTokenizer.getToken();
-                token.serializeToken(queryTokenDos);
+                token.serializeToken(queryTokenFieldData);
                 queryTokenBuilder.addFieldEndOffset();
                 // WARNING: assuming one frame is big enough to hold all tokens
                 queryTokenAppender.append(queryTokenBuilder.getFieldEndOffsets(), queryTokenBuilder.getByteArray(), 0,
@@ -294,7 +294,8 @@
         boolean advanceCursor = true;
         boolean advancePrevResult = false;
         int resultTidx = 0;
-
+        currentNumResults = 0;
+        
         resultFrameTupleAcc.reset(prevCurrentBuffer);
         resultFrameTupleApp.reset(newCurrentBuffer, true);
 
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
index 65afa65..2f60952 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/AbstractUTF8Token.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 
 public abstract class AbstractUTF8Token implements IToken {
     public static final int GOLDEN_RATIO_32 = 0x09e3779b9;
@@ -97,8 +98,8 @@
     }
 
     @Override
-    public void serializeTokenCount(DataOutput dos) throws IOException {
-        handleCountTypeTag(dos);
-        dos.writeInt(tokenCount);
+    public void serializeTokenCount(GrowableArray out) throws IOException {
+        handleCountTypeTag(out.getDataOutput());
+        out.getDataOutput().writeInt(tokenCount);
     }
 }
\ No newline at end of file
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
index b7bb828..a1a4354 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8NGramToken.java
@@ -19,10 +19,10 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 
 public class HashedUTF8NGramToken extends UTF8NGramToken {
     public HashedUTF8NGramToken(byte tokenTypeTag, byte countTypeTag) {
@@ -30,8 +30,8 @@
     }
 
     @Override
-    public void serializeToken(DataOutput dos) throws IOException {
-        handleTokenTypeTag(dos);
+    public void serializeToken(GrowableArray out) throws IOException {
+        handleTokenTypeTag(out.getDataOutput());
 
         int hash = GOLDEN_RATIO_32;
 
@@ -59,6 +59,6 @@
         // token count
         hash += tokenCount;
 
-        dos.writeInt(hash);
+        out.getDataOutput().writeInt(hash);
     }
 }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
index 42ed053..20405c6 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/HashedUTF8WordToken.java
@@ -19,10 +19,10 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 
 public class HashedUTF8WordToken extends UTF8WordToken {
 
@@ -76,12 +76,12 @@
     }
 
     @Override
-    public void serializeToken(DataOutput dos) throws IOException {
+    public void serializeToken(GrowableArray out) throws IOException {
         if (tokenTypeTag > 0) {
-            dos.write(tokenTypeTag);
+            out.getDataOutput().write(tokenTypeTag);
         }
 
         // serialize hash value
-        dos.writeInt(hash);
+        out.getDataOutput().writeInt(hash);
     }
 }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
index c1840d7..47467a1 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/IToken.java
@@ -19,9 +19,10 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
+
 public interface IToken {
 	public byte[] getData();
 
@@ -34,7 +35,7 @@
 	public void reset(byte[] data, int start, int length, int tokenLength,
 			int tokenCount);
 
-	public void serializeToken(DataOutput dos) throws IOException;
+	public void serializeToken(GrowableArray out) throws IOException;
 
-	public void serializeTokenCount(DataOutput dos) throws IOException;
+	public void serializeTokenCount(GrowableArray out) throws IOException;
 }
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
index 59cadc8..8cb9818 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8NGramToken.java
@@ -19,10 +19,10 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
 
 public class UTF8NGramToken extends AbstractUTF8Token implements INGramToken {
@@ -49,34 +49,39 @@
     }
 
     @Override
-    public void serializeToken(DataOutput dos) throws IOException {
-        handleTokenTypeTag(dos);
+    public void serializeToken(GrowableArray out) throws IOException {
+        handleTokenTypeTag(out.getDataOutput());
+        int tokenUTF8LenOff = out.getLength();
 
         // regular chars
         int numRegChars = tokenLength - numPreChars - numPostChars;
 
         // assuming pre and post char need 1-byte each in utf8
-        int tokenUTF8Len = getLowerCaseUTF8Len(numRegChars) + numPreChars + numPostChars;
+        int tokenUTF8Len = numPreChars + numPostChars;
 
-        // write utf8 length indicator
-        StringUtils.writeUTF8Len(tokenUTF8Len, dos);
+        // Write dummy UTF length which will be correctly set later.
+        out.getDataOutput().writeShort(0);
 
         // pre chars
         for (int i = 0; i < numPreChars; i++) {
-            StringUtils.writeCharAsModifiedUTF8(PRECHAR, dos);
+            StringUtils.writeCharAsModifiedUTF8(PRECHAR, out.getDataOutput());
         }
 
         int pos = start;
         for (int i = 0; i < numRegChars; i++) {
             char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
-            StringUtils.writeCharAsModifiedUTF8(c, dos);
+            tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
             pos += UTF8StringPointable.charSize(data, pos);
         }
 
         // post chars
         for (int i = 0; i < numPostChars; i++) {
-            StringUtils.writeCharAsModifiedUTF8(POSTCHAR, dos);
+            StringUtils.writeCharAsModifiedUTF8(POSTCHAR, out.getDataOutput());
         }
+
+        // Set UTF length of token.
+        out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
+        out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
     }
 
     public void setNumPrePostChars(int numPreChars, int numPostChars) {
diff --git a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
index 97a1e12..9d7fe7c 100644
--- a/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
+++ b/hyracks/hyracks-storage-am-invertedindex/src/main/java/edu/uci/ics/hyracks/storage/am/invertedindex/tokenizers/UTF8WordToken.java
@@ -19,10 +19,10 @@
 
 package edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers;
 
-import java.io.DataOutput;
 import java.io.IOException;
 
 import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.dataflow.common.data.util.StringUtils;
 
 public class UTF8WordToken extends AbstractUTF8Token {
@@ -32,16 +32,20 @@
     }
 
     @Override
-    public void serializeToken(DataOutput dos) throws IOException {
-        handleTokenTypeTag(dos);
-
-        int tokenUTF8Len = getLowerCaseUTF8Len(tokenLength);
-        StringUtils.writeUTF8Len(tokenUTF8Len, dos);
+    public void serializeToken(GrowableArray out) throws IOException {
+        handleTokenTypeTag(out.getDataOutput());
+        int tokenUTF8LenOff = out.getLength();
+        int tokenUTF8Len = 0;
+        // Write dummy UTF length which will be correctly set later.
+        out.getDataOutput().writeShort(0);
         int pos = start;
         for (int i = 0; i < tokenLength; i++) {
             char c = Character.toLowerCase(UTF8StringPointable.charAt(data, pos));
-            StringUtils.writeCharAsModifiedUTF8(c, dos);
+            tokenUTF8Len += StringUtils.writeCharAsModifiedUTF8(c, out.getDataOutput());
             pos += UTF8StringPointable.charSize(data, pos);
         }
+        // Set UTF length of token.
+        out.getByteArray()[tokenUTF8LenOff] = (byte) ((tokenUTF8Len >>> 8) & 0xFF);
+        out.getByteArray()[tokenUTF8LenOff + 1] = (byte) ((tokenUTF8Len >>> 0) & 0xFF);
     }
 }
diff --git a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
index 9818dce..d9b7b97 100644
--- a/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
+++ b/hyracks/hyracks-storage-am-rtree/src/main/java/edu/uci/ics/hyracks/storage/am/rtree/dataflow/RTreeSearchOperatorDescriptor.java
@@ -40,9 +40,10 @@
             IStorageManagerInterface storageManager, IIndexRegistryProvider<IIndex> indexRegistryProvider,
             IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
             IBinaryComparatorFactory[] comparatorFactories, int[] keyFields,
-            IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput, IOperationCallbackProvider opCallbackProvider) {
+            IIndexDataflowHelperFactory dataflowHelperFactory, boolean retainInput,
+            IOperationCallbackProvider opCallbackProvider) {
         super(spec, 1, 1, recDesc, storageManager, indexRegistryProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, dataflowHelperFactory, null, false, opCallbackProvider);
+                comparatorFactories, dataflowHelperFactory, null, retainInput, opCallbackProvider);
         this.keyFields = keyFields;
     }
 
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/NGramTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/NGramTokenizerTest.java
index 5f15a91..3fb6407 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/NGramTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/NGramTokenizerTest.java
@@ -33,6 +33,7 @@
 import org.junit.Before;
 import org.junit.Test;
 
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.AbstractUTF8Token;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.HashedUTF8NGramTokenFactory;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.IToken;
@@ -41,207 +42,196 @@
 
 public class NGramTokenizerTest {
 
-	private char PRECHAR = '#';
-	private char POSTCHAR = '$';
+    private char PRECHAR = '#';
+    private char POSTCHAR = '$';
 
-	private String str = "Jürgen S. Generic's Car";
-	private byte[] inputBuffer;
+    private String str = "Jürgen S. Generic's Car";
+    private byte[] inputBuffer;
 
-	private int gramLength = 3;
+    private int gramLength = 3;
 
-	private void getExpectedGrams(String s, int gramLength,
-			ArrayList<String> grams, boolean prePost) {
+    private void getExpectedGrams(String s, int gramLength, ArrayList<String> grams, boolean prePost) {
 
-		String tmp = s.toLowerCase();
-		if (prePost) {
-			StringBuilder preBuilder = new StringBuilder();
-			for (int i = 0; i < gramLength - 1; i++) {
-				preBuilder.append(PRECHAR);
-			}
-			String pre = preBuilder.toString();
+        String tmp = s.toLowerCase();
+        if (prePost) {
+            StringBuilder preBuilder = new StringBuilder();
+            for (int i = 0; i < gramLength - 1; i++) {
+                preBuilder.append(PRECHAR);
+            }
+            String pre = preBuilder.toString();
 
-			StringBuilder postBuilder = new StringBuilder();
-			for (int i = 0; i < gramLength - 1; i++) {
-				postBuilder.append(POSTCHAR);
-			}
-			String post = postBuilder.toString();
+            StringBuilder postBuilder = new StringBuilder();
+            for (int i = 0; i < gramLength - 1; i++) {
+                postBuilder.append(POSTCHAR);
+            }
+            String post = postBuilder.toString();
 
-			tmp = pre + s.toLowerCase() + post;
-		}
+            tmp = pre + s.toLowerCase() + post;
+        }
 
-		for (int i = 0; i < tmp.length() - gramLength + 1; i++) {
-			String gram = tmp.substring(i, i + gramLength);
-			grams.add(gram);
-		}
-	}
+        for (int i = 0; i < tmp.length() - gramLength + 1; i++) {
+            String gram = tmp.substring(i, i + gramLength);
+            grams.add(gram);
+        }
+    }
 
-	@Before
-	public void init() throws Exception {
-		// serialize string into bytes
-		ByteArrayOutputStream baos = new ByteArrayOutputStream();
-		DataOutput dos = new DataOutputStream(baos);
-		dos.writeUTF(str);
-		inputBuffer = baos.toByteArray();
-	}
+    @Before
+    public void init() throws Exception {
+        // serialize string into bytes
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        DataOutput dos = new DataOutputStream(baos);
+        dos.writeUTF(str);
+        inputBuffer = baos.toByteArray();
+    }
 
-	void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost)
-			throws IOException {
-		HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
-		NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(
-				gramLength, prePost, false, false, tokenFactory);
-		tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+    void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
+        HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, false,
+                false, tokenFactory);
+        tokenizer.reset(inputBuffer, 0, inputBuffer.length);
 
-		ArrayList<String> expectedGrams = new ArrayList<String>();
-		getExpectedGrams(str, gramLength, expectedGrams, prePost);
-		ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
-		HashMap<String, Integer> gramCounts = new HashMap<String, Integer>();
-		for (String s : expectedGrams) {
-			Integer count = gramCounts.get(s);
-			if (count == null) {
-				count = 1;
-				gramCounts.put(s, count);
-			} else {
-				count++;
-			}
+        ArrayList<String> expectedGrams = new ArrayList<String>();
+        getExpectedGrams(str, gramLength, expectedGrams, prePost);
+        ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+        HashMap<String, Integer> gramCounts = new HashMap<String, Integer>();
+        for (String s : expectedGrams) {
+            Integer count = gramCounts.get(s);
+            if (count == null) {
+                count = 1;
+                gramCounts.put(s, count);
+            } else {
+                count++;
+            }
 
-			int hash = tokenHash(s, count);
-			expectedHashedGrams.add(hash);
-		}
+            int hash = tokenHash(s, count);
+            expectedHashedGrams.add(hash);
+        }
 
-		int tokenCount = 0;
+        int tokenCount = 0;
 
-		while (tokenizer.hasNext()) {
-			tokenizer.next();
+        while (tokenizer.hasNext()) {
+            tokenizer.next();
 
-			// serialize hashed token
-			ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-			DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            // serialize hashed token
+            GrowableArray tokenStorage = new GrowableArray();
 
-			IToken token = tokenizer.getToken();
-			token.serializeToken(tokenDos);
+            IToken token = tokenizer.getToken();
+            token.serializeToken(tokenStorage);
 
-			// deserialize token
-			ByteArrayInputStream bais = new ByteArrayInputStream(
-					tokenBaos.toByteArray());
-			DataInput in = new DataInputStream(bais);
+            // deserialize token
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
+            DataInput in = new DataInputStream(bais);
 
-			Integer hashedGram = in.readInt();
+            Integer hashedGram = in.readInt();
 
-			// System.out.println(hashedGram);
+            // System.out.println(hashedGram);
 
-			Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+            Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
 
-			tokenCount++;
-		}
-		// System.out.println("---------");
-	}
+            tokenCount++;
+        }
+        // System.out.println("---------");
+    }
 
-	void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost)
-			throws IOException {
-		HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
-		NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(
-				gramLength, prePost, true, false, tokenFactory);
-		tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+    void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
+        HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+                tokenFactory);
+        tokenizer.reset(inputBuffer, 0, inputBuffer.length);
 
-		ArrayList<String> expectedGrams = new ArrayList<String>();
-		getExpectedGrams(str, gramLength, expectedGrams, prePost);
-		ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
-		for (String s : expectedGrams) {
-			int hash = tokenHash(s, 1);
-			expectedHashedGrams.add(hash);
-		}
+        ArrayList<String> expectedGrams = new ArrayList<String>();
+        getExpectedGrams(str, gramLength, expectedGrams, prePost);
+        ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+        for (String s : expectedGrams) {
+            int hash = tokenHash(s, 1);
+            expectedHashedGrams.add(hash);
+        }
 
-		int tokenCount = 0;
+        int tokenCount = 0;
 
-		while (tokenizer.hasNext()) {
-			tokenizer.next();
+        while (tokenizer.hasNext()) {
+            tokenizer.next();
 
-			// serialize hashed token
-			ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-			DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            // serialize hashed token
+            GrowableArray tokenStorage = new GrowableArray();
 
-			IToken token = tokenizer.getToken();
-			token.serializeToken(tokenDos);
+            IToken token = tokenizer.getToken();
+            token.serializeToken(tokenStorage);
 
-			// deserialize token
-			ByteArrayInputStream bais = new ByteArrayInputStream(
-					tokenBaos.toByteArray());
-			DataInput in = new DataInputStream(bais);
+            // deserialize token
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
+            DataInput in = new DataInputStream(bais);
 
-			Integer hashedGram = in.readInt();
+            Integer hashedGram = in.readInt();
 
-			// System.out.println(hashedGram);
+            // System.out.println(hashedGram);
 
-			Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+            Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
 
-			tokenCount++;
-		}
-		// System.out.println("---------");
-	}
+            tokenCount++;
+        }
+        // System.out.println("---------");
+    }
 
-	void runTestNGramTokenizerWithUTF8Tokens(boolean prePost)
-			throws IOException {
-		UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
-		NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(
-				gramLength, prePost, true, false, tokenFactory);
-		tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+    void runTestNGramTokenizerWithUTF8Tokens(boolean prePost) throws IOException {
+        UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
+        NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+                tokenFactory);
+        tokenizer.reset(inputBuffer, 0, inputBuffer.length);
 
-		ArrayList<String> expectedGrams = new ArrayList<String>();
-		getExpectedGrams(str, gramLength, expectedGrams, prePost);
+        ArrayList<String> expectedGrams = new ArrayList<String>();
+        getExpectedGrams(str, gramLength, expectedGrams, prePost);
 
-		int tokenCount = 0;
+        int tokenCount = 0;
 
-		while (tokenizer.hasNext()) {
-			tokenizer.next();
+        while (tokenizer.hasNext()) {
+            tokenizer.next();
 
-			// serialize hashed token
-			ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-			DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            // serialize hashed token
+            GrowableArray tokenStorage = new GrowableArray();
 
-			IToken token = tokenizer.getToken();
-			token.serializeToken(tokenDos);
+            IToken token = tokenizer.getToken();
+            token.serializeToken(tokenStorage);
 
-			// deserialize token
-			ByteArrayInputStream bais = new ByteArrayInputStream(
-					tokenBaos.toByteArray());
-			DataInput in = new DataInputStream(bais);
+            // deserialize token
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
+            DataInput in = new DataInputStream(bais);
 
-			String strGram = in.readUTF();
+            String strGram = in.readUTF();
 
-			// System.out.println("\"" + strGram + "\"");
+            // System.out.println("\"" + strGram + "\"");
 
-			Assert.assertEquals(expectedGrams.get(tokenCount), strGram);
+            Assert.assertEquals(expectedGrams.get(tokenCount), strGram);
 
-			tokenCount++;
-		}
-		// System.out.println("---------");
-	}
+            tokenCount++;
+        }
+        // System.out.println("---------");
+    }
 
-	@Test
-	public void testNGramTokenizerWithCountedHashedUTF8Tokens()
-			throws Exception {
-		runTestNGramTokenizerWithCountedHashedUTF8Tokens(false);
-		runTestNGramTokenizerWithCountedHashedUTF8Tokens(true);
-	}
+    @Test
+    public void testNGramTokenizerWithCountedHashedUTF8Tokens() throws Exception {
+        runTestNGramTokenizerWithCountedHashedUTF8Tokens(false);
+        runTestNGramTokenizerWithCountedHashedUTF8Tokens(true);
+    }
 
-	@Test
-	public void testNGramTokenizerWithHashedUTF8Tokens() throws Exception {
-		runTestNGramTokenizerWithHashedUTF8Tokens(false);
-		runTestNGramTokenizerWithHashedUTF8Tokens(true);
-	}
+    @Test
+    public void testNGramTokenizerWithHashedUTF8Tokens() throws Exception {
+        runTestNGramTokenizerWithHashedUTF8Tokens(false);
+        runTestNGramTokenizerWithHashedUTF8Tokens(true);
+    }
 
-	@Test
-	public void testNGramTokenizerWithUTF8Tokens() throws IOException {
-		runTestNGramTokenizerWithUTF8Tokens(false);
-		runTestNGramTokenizerWithUTF8Tokens(true);
-	}
+    @Test
+    public void testNGramTokenizerWithUTF8Tokens() throws IOException {
+        runTestNGramTokenizerWithUTF8Tokens(false);
+        runTestNGramTokenizerWithUTF8Tokens(true);
+    }
 
-	public int tokenHash(String token, int tokenCount) {
-		int h = AbstractUTF8Token.GOLDEN_RATIO_32;
-		for (int i = 0; i < token.length(); i++) {
-			h ^= token.charAt(i);
-			h *= AbstractUTF8Token.GOLDEN_RATIO_32;
-		}
-		return h + tokenCount;
-	}
+    public int tokenHash(String token, int tokenCount) {
+        int h = AbstractUTF8Token.GOLDEN_RATIO_32;
+        for (int i = 0; i < token.length(); i++) {
+            h ^= token.charAt(i);
+            h *= AbstractUTF8Token.GOLDEN_RATIO_32;
+        }
+        return h + tokenCount;
+    }
 }
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
index c3c9b99..47a068b 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/SearchTest.java
@@ -27,6 +27,7 @@
 
 import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparator;
 import edu.uci.ics.hyracks.data.std.util.ByteArrayAccessibleOutputStream;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
 import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
@@ -112,20 +113,19 @@
 	}
 
 	private class TokenIdPair implements Comparable<TokenIdPair> {
-		public ByteArrayAccessibleOutputStream baaos = new ByteArrayAccessibleOutputStream();
-		public DataOutputStream dos = new DataOutputStream(baaos);
+	    public final GrowableArray tokenStorage = new GrowableArray();
 		public int id;
 
 		TokenIdPair(IToken token, int id) throws IOException {
-			token.serializeToken(dos);
+			token.serializeToken(tokenStorage);
 			this.id = id;
 		}
 
 		@Override
 		public int compareTo(TokenIdPair o) {
-			int cmp = btreeBinCmps[0].compare(baaos.getByteArray(), 0,
-					baaos.getByteArray().length, o.baaos.getByteArray(), 0,
-					o.baaos.getByteArray().length);
+			int cmp = btreeBinCmps[0].compare(tokenStorage.getByteArray(), 0,
+			        tokenStorage.getByteArray().length, o.tokenStorage.getByteArray(), 0,
+					o.tokenStorage.getByteArray().length);
 			if (cmp == 0) {
 				return id - o.id;
 			} else {
@@ -157,8 +157,8 @@
 
 		for (TokenIdPair t : pairs) {
 			tb.reset();
-			tb.addField(t.baaos.getByteArray(), 0,
-					t.baaos.getByteArray().length);
+			tb.addField(t.tokenStorage.getByteArray(), 0,
+					t.tokenStorage.getByteArray().length);
 			IntegerSerializerDeserializer.INSTANCE.serialize(t.id, tb.getDataOutput());
 			tb.addFieldEndOffset();
 			tuple.reset(tb.getFieldEndOffsets(), tb.getByteArray());
diff --git a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/WordTokenizerTest.java b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/WordTokenizerTest.java
index 53fb96d..810c5f5 100644
--- a/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/WordTokenizerTest.java
+++ b/hyracks/hyracks-tests/hyracks-storage-am-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/invertedindex/WordTokenizerTest.java
@@ -34,6 +34,7 @@
 import org.junit.Before;
 import org.junit.Test;
 
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.AbstractUTF8Token;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer;
 import edu.uci.ics.hyracks.storage.am.invertedindex.tokenizers.HashedUTF8WordTokenFactory;
@@ -127,14 +128,13 @@
             tokenizer.next();
 
             // serialize token
-            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-            DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            GrowableArray tokenStorage = new GrowableArray();
 
             IToken token = tokenizer.getToken();
-            token.serializeToken(tokenDos);
+            token.serializeToken(tokenStorage);
 
             // deserialize token
-            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
             DataInput in = new DataInputStream(bais);
 
             Integer hashedToken = in.readInt();
@@ -159,14 +159,13 @@
             tokenizer.next();
 
             // serialize token
-            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-            DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            GrowableArray tokenStorage = new GrowableArray();
 
             IToken token = tokenizer.getToken();
-            token.serializeToken(tokenDos);
+            token.serializeToken(tokenStorage);
 
             // deserialize token
-            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
             DataInput in = new DataInputStream(bais);
 
             Integer hashedToken = in.readInt();
@@ -191,14 +190,13 @@
             tokenizer.next();
 
             // serialize hashed token
-            ByteArrayOutputStream tokenBaos = new ByteArrayOutputStream();
-            DataOutput tokenDos = new DataOutputStream(tokenBaos);
+            GrowableArray tokenStorage = new GrowableArray();
 
             IToken token = tokenizer.getToken();
-            token.serializeToken(tokenDos);
+            token.serializeToken(tokenStorage);
 
             // deserialize token
-            ByteArrayInputStream bais = new ByteArrayInputStream(tokenBaos.toByteArray());
+            ByteArrayInputStream bais = new ByteArrayInputStream(tokenStorage.getByteArray());
             DataInput in = new DataInputStream(bais);
 
             String strToken = in.readUTF();
diff --git a/hyracks/pom.xml b/hyracks/pom.xml
index 570421e..1d4e094 100644
--- a/hyracks/pom.xml
+++ b/hyracks/pom.xml
@@ -1,4 +1,4 @@
-
+<?xml version="1.0" encoding="UTF-8"?>
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
   <modelVersion>4.0.0</modelVersion>
   <groupId>edu.uci.ics.hyracks</groupId>
@@ -8,24 +8,9 @@
   <name>hyracks</name>
 
   <properties>
-    <jvm.extraargs />
+    <jvm.extraargs/>
   </properties>
 
-  <profiles>
-    <profile>
-      <id>macosx</id>
-      <activation>
-        <os>
-          <name>mac os x</name>
-        </os>
-        <jdk>1.7</jdk>
-      </activation>
-      <properties>
-        <jvm.extraargs>-Djava.nio.channels.spi.SelectorProvider=sun.nio.ch.KQueueSelectorProvider</jvm.extraargs>
-      </properties>
-    </profile>
-  </profiles>
-
   <build>
     <plugins>
       <plugin>
@@ -116,5 +101,6 @@
     <module>hyracks-hadoop-compat</module>
     <!--module>hyracks-yarn</module-->
     <module>hyracks-maven-plugins</module>
+    <module>hyracks-dist</module>
   </modules>
-</project>
+</project>
\ No newline at end of file