add LSM support in pregelix
diff --git a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
index 0c2416d..d3bcaca 100644
--- a/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
+++ b/hivesterix/hivesterix-dist/src/main/java/edu/uci/ics/hivesterix/runtime/exec/HyracksExecutionEngine.java
@@ -12,598 +12,601 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package edu.uci.ics.hivesterix.runtime.exec;

-

-import java.io.BufferedReader;

-import java.io.FileInputStream;

-import java.io.InputStream;

-import java.io.InputStreamReader;

-import java.io.PrintWriter;

-import java.io.Serializable;

-import java.net.InetAddress;

-import java.util.ArrayList;

-import java.util.HashMap;

-import java.util.Iterator;

-import java.util.List;

-import java.util.Map;

-import java.util.Map.Entry;

-import java.util.Properties;

-import java.util.Set;

-

-import org.apache.commons.logging.Log;

-import org.apache.commons.logging.LogFactory;

-import org.apache.hadoop.hive.conf.HiveConf;

-import org.apache.hadoop.hive.ql.exec.ConditionalTask;

-import org.apache.hadoop.hive.ql.exec.FileSinkOperator;

-import org.apache.hadoop.hive.ql.exec.MapRedTask;

-import org.apache.hadoop.hive.ql.exec.Operator;

-import org.apache.hadoop.hive.ql.exec.TableScanOperator;

-import org.apache.hadoop.hive.ql.exec.Task;

-import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;

-import org.apache.hadoop.hive.ql.plan.FetchWork;

-import org.apache.hadoop.hive.ql.plan.FileSinkDesc;

-import org.apache.hadoop.hive.ql.plan.MapredLocalWork;

-import org.apache.hadoop.hive.ql.plan.MapredWork;

-import org.apache.hadoop.hive.ql.plan.PartitionDesc;

-import org.apache.hadoop.hive.ql.plan.TableScanDesc;

-

-import edu.uci.ics.hivesterix.common.config.ConfUtil;

-import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;

-import edu.uci.ics.hivesterix.logical.expression.HiveMergeAggregationExpressionFactory;

-import edu.uci.ics.hivesterix.logical.expression.HiveNullableTypeComputer;

-import edu.uci.ics.hivesterix.logical.expression.HivePartialAggregationTypeComputer;

-import edu.uci.ics.hivesterix.logical.plan.HiveAlgebricksTranslator;

-import edu.uci.ics.hivesterix.logical.plan.HiveLogicalPlanAndMetaData;

-import edu.uci.ics.hivesterix.optimizer.rulecollections.HiveRuleCollections;

-import edu.uci.ics.hivesterix.runtime.factory.evaluator.HiveExpressionRuntimeProvider;

-import edu.uci.ics.hivesterix.runtime.factory.nullwriter.HiveNullWriterFactory;

-import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryBooleanInspectorFactory;

-import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryIntegerInspectorFactory;

-import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy;

-import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy.Policy;

-import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryComparatorFactoryProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFactoryProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFamilyProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HiveNormalizedKeyComputerFactoryProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HivePrinterFactoryProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HiveSerializerDeserializerProvider;

-import edu.uci.ics.hivesterix.runtime.provider.HiveTypeTraitProvider;

-import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;

-import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;

-import edu.uci.ics.hyracks.algebricks.common.utils.Pair;

-import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder;

-import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder.DefaultOptimizationContextFactory;

-import edu.uci.ics.hyracks.algebricks.compiler.api.ICompiler;

-import edu.uci.ics.hyracks.algebricks.compiler.api.ICompilerFactory;

-import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController;

-import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialOnceRuleController;

-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;

-import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;

-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;

-import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;

-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController;

-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;

-import edu.uci.ics.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;

-import edu.uci.ics.hyracks.api.client.HyracksConnection;

-import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;

-import edu.uci.ics.hyracks.api.job.JobId;

-import edu.uci.ics.hyracks.api.job.JobSpecification;

-

-@SuppressWarnings({ "rawtypes", "unchecked" })

-public class HyracksExecutionEngine implements IExecutionEngine {

-

-    private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());

-    private static final String clusterPropertiesPath = "conf/cluster.properties";

-    private static final String masterFilePath = "conf/master";

-

-    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

-    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();

-    static {

-        SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);

-        SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);

-        SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

-                HiveRuleCollections.NORMALIZATION));

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

-                HiveRuleCollections.COND_PUSHDOWN_AND_JOIN_INFERENCE));

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,

-                HiveRuleCollections.LOAD_FIELDS));

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

-                HiveRuleCollections.OP_PUSHDOWN));

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

-                HiveRuleCollections.DATA_EXCHANGE));

-        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,

-                HiveRuleCollections.CONSOLIDATION));

-

-        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

-                HiveRuleCollections.PHYSICAL_PLAN_REWRITES));

-        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,

-                HiveRuleCollections.prepareJobGenRules));

-    }

-

-    /**

-     * static configurations for compiler

-     */

-    private HeuristicCompilerFactoryBuilder builder;

-

-    /**

-     * compiler

-     */

-    private ICompiler compiler;

-

-    /**

-     * physical optimization config

-     */

-    private PhysicalOptimizationConfig physicalOptimizationConfig;

-

-    /**

-     * final ending operators

-     */

-    private List<Operator> leaveOps = new ArrayList<Operator>();

-

-    /**

-     * tasks that are already visited

-     */

-    private Map<Task<? extends Serializable>, Boolean> tasksVisited = new HashMap<Task<? extends Serializable>, Boolean>();

-

-    /**

-     * hyracks job spec

-     */

-    private JobSpecification jobSpec;

-

-    /**

-     * hive configuration

-     */

-    private HiveConf conf;

-

-    /**

-     * plan printer

-     */

-    private PrintWriter planPrinter;

-

-    /**

-     * properties

-     */

-    private Properties clusterProps;

-

-    /**

-     * the Hyracks client connection

-     */

-    private IHyracksClientConnection hcc;

-

-    public HyracksExecutionEngine(HiveConf conf) {

-        this.conf = conf;

-        init(conf);

-    }

-

-    public HyracksExecutionEngine(HiveConf conf, PrintWriter planPrinter) {

-        this.conf = conf;

-        this.planPrinter = planPrinter;

-        init(conf);

-    }

-

-    private void init(HiveConf conf) {

-        builder = new HeuristicCompilerFactoryBuilder(DefaultOptimizationContextFactory.INSTANCE);

-        builder.setLogicalRewrites(DEFAULT_LOGICAL_REWRITES);

-        builder.setPhysicalRewrites(DEFAULT_PHYSICAL_REWRITES);

-        builder.setIMergeAggregationExpressionFactory(HiveMergeAggregationExpressionFactory.INSTANCE);

-        builder.setExpressionTypeComputer(HiveExpressionTypeComputer.INSTANCE);

-        builder.setNullableTypeComputer(HiveNullableTypeComputer.INSTANCE);

-

-        long memSizeExternalGby = conf.getLong("hive.algebricks.groupby.external.memory", 268435456);

-        long memSizeExternalSort = conf.getLong("hive.algebricks.sort.memory", 536870912);

-        int frameSize = conf.getInt("hive.algebricks.framesize", 32768);

-

-        physicalOptimizationConfig = new PhysicalOptimizationConfig();

-        int frameLimitExtGby = (int) (memSizeExternalGby / frameSize);

-        physicalOptimizationConfig.setMaxFramesExternalGroupBy(frameLimitExtGby);

-        int frameLimitExtSort = (int) (memSizeExternalSort / frameSize);

-        physicalOptimizationConfig.setMaxFramesExternalSort(frameLimitExtSort);

-        builder.setPhysicalOptimizationConfig(physicalOptimizationConfig);

-    }

-

-    @Override

-    public int compileJob(List<Task<? extends Serializable>> rootTasks) {

-        // clean up

-        leaveOps.clear();

-        tasksVisited.clear();

-        jobSpec = null;

-

-        HashMap<String, PartitionDesc> aliasToPath = new HashMap<String, PartitionDesc>();

-        List<Operator> rootOps = generateRootOperatorDAG(rootTasks, aliasToPath);

-

-        // get all leave Ops

-        getLeaves(rootOps, leaveOps);

-

-        HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();

-        try {

-            translator.translate(rootOps, null, aliasToPath);

-

-            ILogicalPlan plan = translator.genLogicalPlan();

-

-            if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {

-                translator.printOperators();

-                ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,

-                        translator.getMetadataProvider());

-

-                ICompilerFactory compilerFactory = builder.create();

-                compiler = compilerFactory.createCompiler(planAndMetadata.getPlan(),

-                        planAndMetadata.getMetadataProvider(), translator.getVariableCounter());

-

-                // run optimization and re-writing rules for Hive plan

-                compiler.optimize();

-

-                // print optimized plan

-                LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();

-                StringBuilder buffer = new StringBuilder();

-                PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);

-                String planStr = buffer.toString();

-                System.out.println(planStr);

-

-                if (planPrinter != null)

-                    planPrinter.print(planStr);

-            }

-        } catch (Exception e) {

-            e.printStackTrace();

-            return 1;

-        }

-

-        return 0;

-    }

-

-    private void codeGen() throws AlgebricksException {

-        try {

-            // number of cpu cores in the cluster

-            builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));

-        } catch (Exception e) {

-            throw new AlgebricksException(e);

-        }

-        // builder.setClusterTopology(ConfUtil.getClusterTopology());

-        builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);

-        builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);

-        builder.setComparatorFactoryProvider(HiveBinaryComparatorFactoryProvider.INSTANCE);

-        builder.setExpressionRuntimeProvider(HiveExpressionRuntimeProvider.INSTANCE);

-        builder.setHashFunctionFactoryProvider(HiveBinaryHashFunctionFactoryProvider.INSTANCE);

-        builder.setPrinterProvider(HivePrinterFactoryProvider.INSTANCE);

-        builder.setSerializerDeserializerProvider(HiveSerializerDeserializerProvider.INSTANCE);

-        builder.setNullWriterFactory(HiveNullWriterFactory.INSTANCE);

-        builder.setNormalizedKeyComputerFactoryProvider(HiveNormalizedKeyComputerFactoryProvider.INSTANCE);

-        builder.setPartialAggregationTypeComputer(HivePartialAggregationTypeComputer.INSTANCE);

-        builder.setTypeTraitProvider(HiveTypeTraitProvider.INSTANCE);

-        builder.setHashFunctionFamilyProvider(HiveBinaryHashFunctionFamilyProvider.INSTANCE);

-

-        jobSpec = compiler.createJob(null, null);

-

-        // set the policy

-        String policyStr = conf.get("hive.hyracks.connectorpolicy");

-        if (policyStr == null)

-            policyStr = "PIPELINING";

-        Policy policyValue = Policy.valueOf(policyStr);

-        jobSpec.setConnectorPolicyAssignmentPolicy(new HiveConnectorPolicyAssignmentPolicy(policyValue));

-        jobSpec.setUseConnectorPolicyForScheduling(false);

-    }

-

-    @Override

-    public int executeJob() {

-        try {

-            codeGen();

-            executeHyracksJob(jobSpec);

-        } catch (Exception e) {

-            e.printStackTrace();

-            return 1;

-        }

-        return 0;

-    }

-

-    private List<Operator> generateRootOperatorDAG(List<Task<? extends Serializable>> rootTasks,

-            HashMap<String, PartitionDesc> aliasToPath) {

-

-        List<Operator> rootOps = new ArrayList<Operator>();

-        List<Task<? extends Serializable>> toDelete = new ArrayList<Task<? extends Serializable>>();

-        tasksVisited.clear();

-

-        for (int i = rootTasks.size() - 1; i >= 0; i--) {

-            /**

-             * list of map-reduce tasks

-             */

-            Task<? extends Serializable> task = rootTasks.get(i);

-

-            if (task instanceof MapRedTask) {

-                List<Operator> mapRootOps = articulateMapReduceOperators(task, rootOps, aliasToPath, rootTasks);

-                if (i == 0)

-                    rootOps.addAll(mapRootOps);

-                else {

-                    List<Operator> leaves = new ArrayList<Operator>();

-                    getLeaves(rootOps, leaves);

-

-                    List<Operator> mapChildren = new ArrayList<Operator>();

-                    for (Operator childMap : mapRootOps) {

-                        if (childMap instanceof TableScanOperator) {

-                            TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

-                            if (topDesc == null)

-                                mapChildren.add(childMap);

-                            else {

-                                rootOps.add(childMap);

-                            }

-                        } else

-                            mapChildren.add(childMap);

-                    }

-

-                    if (mapChildren.size() > 0) {

-                        for (Operator leaf : leaves)

-                            leaf.setChildOperators(mapChildren);

-                        for (Operator child : mapChildren)

-                            child.setParentOperators(leaves);

-                    }

-                }

-

-                MapredWork mr = (MapredWork) task.getWork();

-                HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

-

-                addAliasToPartition(aliasToPath, map);

-                toDelete.add(task);

-            }

-        }

-

-        for (Task<? extends Serializable> task : toDelete)

-            rootTasks.remove(task);

-

-        return rootOps;

-    }

-

-    private void addAliasToPartition(HashMap<String, PartitionDesc> aliasToPath, HashMap<String, PartitionDesc> map) {

-        Iterator<String> keys = map.keySet().iterator();

-        while (keys.hasNext()) {

-            String key = keys.next();

-            PartitionDesc part = map.get(key);

-            String[] names = key.split(":");

-            for (String name : names) {

-                aliasToPath.put(name, part);

-            }

-        }

-    }

-

-    private List<Operator> articulateMapReduceOperators(Task task, List<Operator> rootOps,

-            HashMap<String, PartitionDesc> aliasToPath, List<Task<? extends Serializable>> rootTasks) {

-        // System.out.println("!"+task.getName());

-        if (!(task instanceof MapRedTask)) {

-            if (!(task instanceof ConditionalTask)) {

-                rootTasks.add(task);

-                return null;

-            } else {

-                // remove map-reduce branches in condition task

-                ConditionalTask condition = (ConditionalTask) task;

-                List<Task<? extends Serializable>> branches = condition.getListTasks();

-                for (int i = branches.size() - 1; i >= 0; i--) {

-                    Task branch = branches.get(i);

-                    if (branch instanceof MapRedTask) {

-                        return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);

-                    }

-                }

-                rootTasks.add(task);

-                return null;

-            }

-        }

-

-        MapredWork mr = (MapredWork) task.getWork();

-        HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();

-

-        // put all aliasToParitionDesc mapping into the map

-        addAliasToPartition(aliasToPath, map);

-

-        MapRedTask mrtask = (MapRedTask) task;

-        MapredWork work = (MapredWork) mrtask.getWork();

-        HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();

-

-        Set entries = operators.entrySet();

-        Iterator<Entry<String, Operator>> iterator = entries.iterator();

-        List<Operator> mapRootOps = new ArrayList<Operator>();

-

-        // get map root operators

-        while (iterator.hasNext()) {

-            Operator next = iterator.next().getValue();

-            if (!mapRootOps.contains(next)) {

-                // clear that only for the case of union

-                mapRootOps.add(next);

-            }

-        }

-

-        // get map local work

-        MapredLocalWork localWork = work.getMapLocalWork();

-        if (localWork != null) {

-            HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();

-

-            Set localEntries = localOperators.entrySet();

-            Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();

-            while (localIterator.hasNext()) {

-                mapRootOps.add(localIterator.next().getValue());

-            }

-

-            HashMap<String, FetchWork> localFetch = localWork.getAliasToFetchWork();

-            Set localFetchEntries = localFetch.entrySet();

-            Iterator<Entry<String, FetchWork>> localFetchIterator = localFetchEntries.iterator();

-            while (localFetchIterator.hasNext()) {

-                Entry<String, FetchWork> fetchMap = localFetchIterator.next();

-                FetchWork fetch = fetchMap.getValue();

-                String alias = fetchMap.getKey();

-                List<PartitionDesc> dirPart = fetch.getPartDesc();

-

-                // temporary hack: put the first partitionDesc into the map

-                aliasToPath.put(alias, dirPart.get(0));

-            }

-        }

-

-        Boolean visited = tasksVisited.get(task);

-        if (visited != null && visited.booleanValue() == true) {

-            return mapRootOps;

-        }

-

-        // do that only for union operator

-        for (Operator op : mapRootOps)

-            if (op.getParentOperators() != null)

-                op.getParentOperators().clear();

-

-        List<Operator> mapLeaves = new ArrayList<Operator>();

-        downToLeaves(mapRootOps, mapLeaves);

-        List<Operator> reduceOps = new ArrayList<Operator>();

-

-        if (work.getReducer() != null)

-            reduceOps.add(work.getReducer());

-

-        for (Operator mapLeaf : mapLeaves) {

-            mapLeaf.setChildOperators(reduceOps);

-        }

-

-        for (Operator reduceOp : reduceOps) {

-            if (reduceOp != null)

-                reduceOp.setParentOperators(mapLeaves);

-        }

-

-        List<Operator> leafs = new ArrayList<Operator>();

-        if (reduceOps.size() > 0) {

-            downToLeaves(reduceOps, leafs);

-        } else {

-            leafs = mapLeaves;

-        }

-

-        List<Operator> mapChildren = new ArrayList<Operator>();

-        if (task.getChildTasks() != null && task.getChildTasks().size() > 0) {

-            for (Object child : task.getChildTasks()) {

-                List<Operator> childMapOps = articulateMapReduceOperators((Task) child, rootOps, aliasToPath, rootTasks);

-                if (childMapOps == null)

-                    continue;

-

-                for (Operator childMap : childMapOps) {

-                    if (childMap instanceof TableScanOperator) {

-                        TableScanDesc topDesc = (TableScanDesc) childMap.getConf();

-                        if (topDesc == null)

-                            mapChildren.add(childMap);

-                        else {

-                            rootOps.add(childMap);

-                        }

-                    } else {

-                        // if not table scan, add the child

-                        mapChildren.add(childMap);

-                    }

-                }

-            }

-

-            if (mapChildren.size() > 0) {

-                int i = 0;

-                for (Operator leaf : leafs) {

-                    if (leaf.getChildOperators() == null || leaf.getChildOperators().size() == 0)

-                        leaf.setChildOperators(new ArrayList<Operator>());

-                    leaf.getChildOperators().add(mapChildren.get(i));

-                    i++;

-                }

-                i = 0;

-                for (Operator child : mapChildren) {

-                    if (child.getParentOperators() == null || child.getParentOperators().size() == 0)

-                        child.setParentOperators(new ArrayList<Operator>());

-                    child.getParentOperators().add(leafs.get(i));

-                    i++;

-                }

-            }

-        }

-

-        // mark this task as visited

-        this.tasksVisited.put(task, true);

-        return mapRootOps;

-    }

-

-    /**

-     * down to leaf nodes

-     * 

-     * @param ops

-     * @param leaves

-     */

-    private void downToLeaves(List<Operator> ops, List<Operator> leaves) {

-

-        // Operator currentOp;

-        for (Operator op : ops) {

-            if (op != null && op.getChildOperators() != null && op.getChildOperators().size() > 0) {

-                downToLeaves(op.getChildOperators(), leaves);

-            } else {

-                if (op != null && leaves.indexOf(op) < 0)

-                    leaves.add(op);

-            }

-        }

-    }

-

-    private void getLeaves(List<Operator> roots, List<Operator> currentLeaves) {

-        for (Operator op : roots) {

-            List<Operator> children = op.getChildOperators();

-            if (children == null || children.size() <= 0) {

-                currentLeaves.add(op);

-            } else {

-                getLeaves(children, currentLeaves);

-            }

-        }

-    }

-

-    private void executeHyracksJob(JobSpecification job) throws Exception {

-

-        /**

-         * load the properties file if it is not loaded

-         */

-        if (clusterProps == null) {

-            clusterProps = new Properties();

-            InputStream confIn = new FileInputStream(clusterPropertiesPath);

-            clusterProps.load(confIn);

-            confIn.close();

-        }

-

-        if (hcc == null) {

-            BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));

-            String masterNode = ipReader.readLine();

-            ipReader.close();

-

-            InetAddress[] ips = InetAddress.getAllByName(masterNode);

-            int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));

-            for (InetAddress ip : ips) {

-                if (ip.getAddress().length <= 4) {

-                    try {

-                        hcc = new HyracksConnection(ip.getHostAddress(), port);

-                        break;

-                    } catch (Exception e) {

-                        continue;

-                    }

-                }

-            }

-        }

-

-        long start = System.currentTimeMillis();

-        JobId jobId = hcc.startJob(job);

-        hcc.waitForCompletion(jobId);

-

-        // System.out.println("job finished: " + jobId.toString());

-        // call all leave nodes to end

-        for (Operator leaf : leaveOps) {

-            jobClose(leaf);

-        }

-

-        long end = System.currentTimeMillis();

-        System.err.println(start + " " + end + " " + (end - start));

-    }

-

-    /**

-     * mv to final directory on hdfs (not real final)

-     * 

-     * @param leaf

-     * @throws Exception

-     */

-    private void jobClose(Operator leaf) throws Exception {

-        FileSinkOperator fsOp = (FileSinkOperator) leaf;

-        FileSinkDesc desc = fsOp.getConf();

-        boolean isNativeTable = !desc.getTableInfo().isNonNative();

-        if ((conf != null) && isNativeTable) {

-            String specPath = desc.getDirName();

-            DynamicPartitionCtx dpCtx = desc.getDynPartCtx();

-            // for 0.7.0

-            fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);

-            // for 0.8.0

-            // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,

-            // desc);

-        }

-    }

-}

+package edu.uci.ics.hivesterix.runtime.exec;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.PrintWriter;
+import java.io.Serializable;
+import java.net.InetAddress;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.ConditionalTask;
+import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
+import org.apache.hadoop.hive.ql.exec.MapRedTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.TableScanOperator;
+import org.apache.hadoop.hive.ql.exec.Task;
+import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
+import org.apache.hadoop.hive.ql.plan.FetchWork;
+import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
+import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
+import org.apache.hadoop.hive.ql.plan.MapredWork;
+import org.apache.hadoop.hive.ql.plan.PartitionDesc;
+import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+
+import edu.uci.ics.hivesterix.common.config.ConfUtil;
+import edu.uci.ics.hivesterix.logical.expression.HiveExpressionTypeComputer;
+import edu.uci.ics.hivesterix.logical.expression.HiveMergeAggregationExpressionFactory;
+import edu.uci.ics.hivesterix.logical.expression.HiveNullableTypeComputer;
+import edu.uci.ics.hivesterix.logical.expression.HivePartialAggregationTypeComputer;
+import edu.uci.ics.hivesterix.logical.plan.HiveAlgebricksTranslator;
+import edu.uci.ics.hivesterix.logical.plan.HiveLogicalPlanAndMetaData;
+import edu.uci.ics.hivesterix.optimizer.rulecollections.HiveRuleCollections;
+import edu.uci.ics.hivesterix.runtime.factory.evaluator.HiveExpressionRuntimeProvider;
+import edu.uci.ics.hivesterix.runtime.factory.nullwriter.HiveNullWriterFactory;
+import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryBooleanInspectorFactory;
+import edu.uci.ics.hivesterix.runtime.inspector.HiveBinaryIntegerInspectorFactory;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy;
+import edu.uci.ics.hivesterix.runtime.jobgen.HiveConnectorPolicyAssignmentPolicy.Policy;
+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryComparatorFactoryProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFactoryProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HiveBinaryHashFunctionFamilyProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HiveNormalizedKeyComputerFactoryProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HivePrinterFactoryProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HiveSerializerDeserializerProvider;
+import edu.uci.ics.hivesterix.runtime.provider.HiveTypeTraitProvider;
+import edu.uci.ics.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
+import edu.uci.ics.hyracks.algebricks.common.exceptions.AlgebricksException;
+import edu.uci.ics.hyracks.algebricks.common.utils.Pair;
+import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder;
+import edu.uci.ics.hyracks.algebricks.compiler.api.HeuristicCompilerFactoryBuilder.DefaultOptimizationContextFactory;
+import edu.uci.ics.hyracks.algebricks.compiler.api.ICompiler;
+import edu.uci.ics.hyracks.algebricks.compiler.api.ICompilerFactory;
+import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialFixpointRuleController;
+import edu.uci.ics.hyracks.algebricks.compiler.rewriter.rulecontrollers.SequentialOnceRuleController;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlan;
+import edu.uci.ics.hyracks.algebricks.core.algebra.base.ILogicalPlanAndMetadata;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.LogicalOperatorPrettyPrintVisitor;
+import edu.uci.ics.hyracks.algebricks.core.algebra.prettyprint.PlanPrettyPrinter;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.AbstractRuleController;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
+import edu.uci.ics.hyracks.algebricks.core.rewriter.base.PhysicalOptimizationConfig;
+import edu.uci.ics.hyracks.api.client.HyracksConnection;
+import edu.uci.ics.hyracks.api.client.IHyracksClientConnection;
+import edu.uci.ics.hyracks.api.job.JobId;
+import edu.uci.ics.hyracks.api.job.JobSpecification;
+
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class HyracksExecutionEngine implements IExecutionEngine {
+
+    private static final Log LOG = LogFactory.getLog(HyracksExecutionEngine.class.getName());
+    private static final String clusterPropertiesPath = "conf/cluster.properties";
+    private static final String masterFilePath = "conf/master";
+
+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_LOGICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
+    private static List<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>> DEFAULT_PHYSICAL_REWRITES = new ArrayList<Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>>();
+    static {
+        SequentialFixpointRuleController seqCtrlNoDfs = new SequentialFixpointRuleController(false);
+        SequentialFixpointRuleController seqCtrlFullDfs = new SequentialFixpointRuleController(true);
+        SequentialOnceRuleController seqOnceCtrl = new SequentialOnceRuleController(true);
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,
+                HiveRuleCollections.NORMALIZATION));
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,
+                HiveRuleCollections.COND_PUSHDOWN_AND_JOIN_INFERENCE));
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlFullDfs,
+                HiveRuleCollections.LOAD_FIELDS));
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,
+                HiveRuleCollections.OP_PUSHDOWN));
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,
+                HiveRuleCollections.DATA_EXCHANGE));
+        DEFAULT_LOGICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqCtrlNoDfs,
+                HiveRuleCollections.CONSOLIDATION));
+
+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,
+                HiveRuleCollections.PHYSICAL_PLAN_REWRITES));
+        DEFAULT_PHYSICAL_REWRITES.add(new Pair<AbstractRuleController, List<IAlgebraicRewriteRule>>(seqOnceCtrl,
+                HiveRuleCollections.prepareJobGenRules));
+    }
+
+    /**
+     * static configurations for compiler
+     */
+    private HeuristicCompilerFactoryBuilder builder;
+
+    /**
+     * compiler
+     */
+    private ICompiler compiler;
+
+    /**
+     * physical optimization config
+     */
+    private PhysicalOptimizationConfig physicalOptimizationConfig;
+
+    /**
+     * final ending operators
+     */
+    private List<Operator> leaveOps = new ArrayList<Operator>();
+
+    /**
+     * tasks that are already visited
+     */
+    private Map<Task<? extends Serializable>, Boolean> tasksVisited = new HashMap<Task<? extends Serializable>, Boolean>();
+
+    /**
+     * hyracks job spec
+     */
+    private JobSpecification jobSpec;
+
+    /**
+     * hive configuration
+     */
+    private HiveConf conf;
+
+    /**
+     * plan printer
+     */
+    private PrintWriter planPrinter;
+
+    /**
+     * properties
+     */
+    private Properties clusterProps;
+
+    /**
+     * the Hyracks client connection
+     */
+    private IHyracksClientConnection hcc;
+
+    public HyracksExecutionEngine(HiveConf conf) {
+        this.conf = conf;
+        init(conf);
+    }
+
+    public HyracksExecutionEngine(HiveConf conf, PrintWriter planPrinter) {
+        this.conf = conf;
+        this.planPrinter = planPrinter;
+        init(conf);
+    }
+
+    private void init(HiveConf conf) {
+        builder = new HeuristicCompilerFactoryBuilder(DefaultOptimizationContextFactory.INSTANCE);
+        builder.setLogicalRewrites(DEFAULT_LOGICAL_REWRITES);
+        builder.setPhysicalRewrites(DEFAULT_PHYSICAL_REWRITES);
+        builder.setIMergeAggregationExpressionFactory(HiveMergeAggregationExpressionFactory.INSTANCE);
+        builder.setExpressionTypeComputer(HiveExpressionTypeComputer.INSTANCE);
+        builder.setNullableTypeComputer(HiveNullableTypeComputer.INSTANCE);
+
+        long memSizeExternalGby = conf.getLong("hive.algebricks.groupby.external.memory", 268435456);
+        long memSizeExternalSort = conf.getLong("hive.algebricks.sort.memory", 536870912);
+        int frameSize = conf.getInt("hive.algebricks.framesize", 32768);
+
+        physicalOptimizationConfig = new PhysicalOptimizationConfig();
+        int frameLimitExtGby = (int) (memSizeExternalGby / frameSize);
+        physicalOptimizationConfig.setMaxFramesExternalGroupBy(frameLimitExtGby);
+        int frameLimitExtSort = (int) (memSizeExternalSort / frameSize);
+        physicalOptimizationConfig.setMaxFramesExternalSort(frameLimitExtSort);
+        builder.setPhysicalOptimizationConfig(physicalOptimizationConfig);
+    }
+
+    @Override
+    public int compileJob(List<Task<? extends Serializable>> rootTasks) {
+        // clean up
+        leaveOps.clear();
+        tasksVisited.clear();
+        jobSpec = null;
+
+        HashMap<String, PartitionDesc> aliasToPath = new HashMap<String, PartitionDesc>();
+        List<Operator> rootOps = generateRootOperatorDAG(rootTasks, aliasToPath);
+
+        // get all leave Ops
+        getLeaves(rootOps, leaveOps);
+
+        HiveAlgebricksTranslator translator = new HiveAlgebricksTranslator();
+        try {
+            translator.translate(rootOps, null, aliasToPath);
+
+            ILogicalPlan plan = translator.genLogicalPlan();
+
+            if (plan.getRoots() != null && plan.getRoots().size() > 0 && plan.getRoots().get(0).getValue() != null) {
+                translator.printOperators();
+                ILogicalPlanAndMetadata planAndMetadata = new HiveLogicalPlanAndMetaData(plan,
+                        translator.getMetadataProvider());
+
+                ICompilerFactory compilerFactory = builder.create();
+                compiler = compilerFactory.createCompiler(planAndMetadata.getPlan(),
+                        planAndMetadata.getMetadataProvider(), translator.getVariableCounter());
+
+                // run optimization and re-writing rules for Hive plan
+                compiler.optimize();
+
+                // print optimized plan
+                LogicalOperatorPrettyPrintVisitor pvisitor = new LogicalOperatorPrettyPrintVisitor();
+                StringBuilder buffer = new StringBuilder();
+                PlanPrettyPrinter.printPlan(plan, buffer, pvisitor, 0);
+                String planStr = buffer.toString();
+                System.out.println(planStr);
+
+                if (planPrinter != null)
+                    planPrinter.print(planStr);
+            } else {
+                /** it is not a map reduce task DAG */
+                return 2;
+            }
+        } catch (Exception e) {
+            e.printStackTrace();
+            return 1;
+        }
+
+        return 0;
+    }
+
+    private void codeGen() throws AlgebricksException {
+        try {
+            // number of cpu cores in the cluster
+            builder.setClusterLocations(new AlgebricksAbsolutePartitionConstraint(ConfUtil.getNCs()));
+        } catch (Exception e) {
+            throw new AlgebricksException(e);
+        }
+        // builder.setClusterTopology(ConfUtil.getClusterTopology());
+        builder.setBinaryBooleanInspectorFactory(HiveBinaryBooleanInspectorFactory.INSTANCE);
+        builder.setBinaryIntegerInspectorFactory(HiveBinaryIntegerInspectorFactory.INSTANCE);
+        builder.setComparatorFactoryProvider(HiveBinaryComparatorFactoryProvider.INSTANCE);
+        builder.setExpressionRuntimeProvider(HiveExpressionRuntimeProvider.INSTANCE);
+        builder.setHashFunctionFactoryProvider(HiveBinaryHashFunctionFactoryProvider.INSTANCE);
+        builder.setPrinterProvider(HivePrinterFactoryProvider.INSTANCE);
+        builder.setSerializerDeserializerProvider(HiveSerializerDeserializerProvider.INSTANCE);
+        builder.setNullWriterFactory(HiveNullWriterFactory.INSTANCE);
+        builder.setNormalizedKeyComputerFactoryProvider(HiveNormalizedKeyComputerFactoryProvider.INSTANCE);
+        builder.setPartialAggregationTypeComputer(HivePartialAggregationTypeComputer.INSTANCE);
+        builder.setTypeTraitProvider(HiveTypeTraitProvider.INSTANCE);
+        builder.setHashFunctionFamilyProvider(HiveBinaryHashFunctionFamilyProvider.INSTANCE);
+
+        jobSpec = compiler.createJob(null, null);
+
+        // set the policy
+        String policyStr = conf.get("hive.hyracks.connectorpolicy");
+        if (policyStr == null)
+            policyStr = "PIPELINING";
+        Policy policyValue = Policy.valueOf(policyStr);
+        jobSpec.setConnectorPolicyAssignmentPolicy(new HiveConnectorPolicyAssignmentPolicy(policyValue));
+        jobSpec.setUseConnectorPolicyForScheduling(false);
+    }
+
+    @Override
+    public int executeJob() {
+        try {
+            codeGen();
+            executeHyracksJob(jobSpec);
+        } catch (Exception e) {
+            e.printStackTrace();
+            return 1;
+        }
+        return 0;
+    }
+
+    private List<Operator> generateRootOperatorDAG(List<Task<? extends Serializable>> rootTasks,
+            HashMap<String, PartitionDesc> aliasToPath) {
+
+        List<Operator> rootOps = new ArrayList<Operator>();
+        List<Task<? extends Serializable>> toDelete = new ArrayList<Task<? extends Serializable>>();
+        tasksVisited.clear();
+
+        for (int i = rootTasks.size() - 1; i >= 0; i--) {
+            /**
+             * list of map-reduce tasks
+             */
+            Task<? extends Serializable> task = rootTasks.get(i);
+
+            if (task instanceof MapRedTask) {
+                List<Operator> mapRootOps = articulateMapReduceOperators(task, rootOps, aliasToPath, rootTasks);
+                if (i == 0)
+                    rootOps.addAll(mapRootOps);
+                else {
+                    List<Operator> leaves = new ArrayList<Operator>();
+                    getLeaves(rootOps, leaves);
+
+                    List<Operator> mapChildren = new ArrayList<Operator>();
+                    for (Operator childMap : mapRootOps) {
+                        if (childMap instanceof TableScanOperator) {
+                            TableScanDesc topDesc = (TableScanDesc) childMap.getConf();
+                            if (topDesc == null)
+                                mapChildren.add(childMap);
+                            else {
+                                rootOps.add(childMap);
+                            }
+                        } else
+                            mapChildren.add(childMap);
+                    }
+
+                    if (mapChildren.size() > 0) {
+                        for (Operator leaf : leaves)
+                            leaf.setChildOperators(mapChildren);
+                        for (Operator child : mapChildren)
+                            child.setParentOperators(leaves);
+                    }
+                }
+
+                MapredWork mr = (MapredWork) task.getWork();
+                HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();
+
+                addAliasToPartition(aliasToPath, map);
+                toDelete.add(task);
+            }
+        }
+
+        for (Task<? extends Serializable> task : toDelete)
+            rootTasks.remove(task);
+
+        return rootOps;
+    }
+
+    private void addAliasToPartition(HashMap<String, PartitionDesc> aliasToPath, HashMap<String, PartitionDesc> map) {
+        Iterator<String> keys = map.keySet().iterator();
+        while (keys.hasNext()) {
+            String key = keys.next();
+            PartitionDesc part = map.get(key);
+            String[] names = key.split(":");
+            for (String name : names) {
+                aliasToPath.put(name, part);
+            }
+        }
+    }
+
+    private List<Operator> articulateMapReduceOperators(Task task, List<Operator> rootOps,
+            HashMap<String, PartitionDesc> aliasToPath, List<Task<? extends Serializable>> rootTasks) {
+        // System.out.println("!"+task.getName());
+        if (!(task instanceof MapRedTask)) {
+            if (!(task instanceof ConditionalTask)) {
+                rootTasks.add(task);
+                return null;
+            } else {
+                // remove map-reduce branches in condition task
+                ConditionalTask condition = (ConditionalTask) task;
+                List<Task<? extends Serializable>> branches = condition.getListTasks();
+                for (int i = branches.size() - 1; i >= 0; i--) {
+                    Task branch = branches.get(i);
+                    if (branch instanceof MapRedTask) {
+                        return articulateMapReduceOperators(branch, rootOps, aliasToPath, rootTasks);
+                    }
+                }
+                rootTasks.add(task);
+                return null;
+            }
+        }
+
+        MapredWork mr = (MapredWork) task.getWork();
+        HashMap<String, PartitionDesc> map = mr.getAliasToPartnInfo();
+
+        // put all aliasToParitionDesc mapping into the map
+        addAliasToPartition(aliasToPath, map);
+
+        MapRedTask mrtask = (MapRedTask) task;
+        MapredWork work = (MapredWork) mrtask.getWork();
+        HashMap<String, Operator<? extends Serializable>> operators = work.getAliasToWork();
+
+        Set entries = operators.entrySet();
+        Iterator<Entry<String, Operator>> iterator = entries.iterator();
+        List<Operator> mapRootOps = new ArrayList<Operator>();
+
+        // get map root operators
+        while (iterator.hasNext()) {
+            Operator next = iterator.next().getValue();
+            if (!mapRootOps.contains(next)) {
+                // clear that only for the case of union
+                mapRootOps.add(next);
+            }
+        }
+
+        // get map local work
+        MapredLocalWork localWork = work.getMapLocalWork();
+        if (localWork != null) {
+            HashMap<String, Operator<? extends Serializable>> localOperators = localWork.getAliasToWork();
+
+            Set localEntries = localOperators.entrySet();
+            Iterator<Entry<String, Operator>> localIterator = localEntries.iterator();
+            while (localIterator.hasNext()) {
+                mapRootOps.add(localIterator.next().getValue());
+            }
+
+            HashMap<String, FetchWork> localFetch = localWork.getAliasToFetchWork();
+            Set localFetchEntries = localFetch.entrySet();
+            Iterator<Entry<String, FetchWork>> localFetchIterator = localFetchEntries.iterator();
+            while (localFetchIterator.hasNext()) {
+                Entry<String, FetchWork> fetchMap = localFetchIterator.next();
+                FetchWork fetch = fetchMap.getValue();
+                String alias = fetchMap.getKey();
+                List<PartitionDesc> dirPart = fetch.getPartDesc();
+
+                // temporary hack: put the first partitionDesc into the map
+                aliasToPath.put(alias, dirPart.get(0));
+            }
+        }
+
+        Boolean visited = tasksVisited.get(task);
+        if (visited != null && visited.booleanValue() == true) {
+            return mapRootOps;
+        }
+
+        // do that only for union operator
+        for (Operator op : mapRootOps)
+            if (op.getParentOperators() != null)
+                op.getParentOperators().clear();
+
+        List<Operator> mapLeaves = new ArrayList<Operator>();
+        downToLeaves(mapRootOps, mapLeaves);
+        List<Operator> reduceOps = new ArrayList<Operator>();
+
+        if (work.getReducer() != null)
+            reduceOps.add(work.getReducer());
+
+        for (Operator mapLeaf : mapLeaves) {
+            mapLeaf.setChildOperators(reduceOps);
+        }
+
+        for (Operator reduceOp : reduceOps) {
+            if (reduceOp != null)
+                reduceOp.setParentOperators(mapLeaves);
+        }
+
+        List<Operator> leafs = new ArrayList<Operator>();
+        if (reduceOps.size() > 0) {
+            downToLeaves(reduceOps, leafs);
+        } else {
+            leafs = mapLeaves;
+        }
+
+        List<Operator> mapChildren = new ArrayList<Operator>();
+        if (task.getChildTasks() != null && task.getChildTasks().size() > 0) {
+            for (Object child : task.getChildTasks()) {
+                List<Operator> childMapOps = articulateMapReduceOperators((Task) child, rootOps, aliasToPath, rootTasks);
+                if (childMapOps == null)
+                    continue;
+
+                for (Operator childMap : childMapOps) {
+                    if (childMap instanceof TableScanOperator) {
+                        TableScanDesc topDesc = (TableScanDesc) childMap.getConf();
+                        if (topDesc == null)
+                            mapChildren.add(childMap);
+                        else {
+                            rootOps.add(childMap);
+                        }
+                    } else {
+                        // if not table scan, add the child
+                        mapChildren.add(childMap);
+                    }
+                }
+            }
+
+            if (mapChildren.size() > 0) {
+                int i = 0;
+                for (Operator leaf : leafs) {
+                    if (leaf.getChildOperators() == null || leaf.getChildOperators().size() == 0)
+                        leaf.setChildOperators(new ArrayList<Operator>());
+                    leaf.getChildOperators().add(mapChildren.get(i));
+                    i++;
+                }
+                i = 0;
+                for (Operator child : mapChildren) {
+                    if (child.getParentOperators() == null || child.getParentOperators().size() == 0)
+                        child.setParentOperators(new ArrayList<Operator>());
+                    child.getParentOperators().add(leafs.get(i));
+                    i++;
+                }
+            }
+        }
+
+        // mark this task as visited
+        this.tasksVisited.put(task, true);
+        return mapRootOps;
+    }
+
+    /**
+     * down to leaf nodes
+     * 
+     * @param ops
+     * @param leaves
+     */
+    private void downToLeaves(List<Operator> ops, List<Operator> leaves) {
+
+        // Operator currentOp;
+        for (Operator op : ops) {
+            if (op != null && op.getChildOperators() != null && op.getChildOperators().size() > 0) {
+                downToLeaves(op.getChildOperators(), leaves);
+            } else {
+                if (op != null && leaves.indexOf(op) < 0)
+                    leaves.add(op);
+            }
+        }
+    }
+
+    private void getLeaves(List<Operator> roots, List<Operator> currentLeaves) {
+        for (Operator op : roots) {
+            List<Operator> children = op.getChildOperators();
+            if (children == null || children.size() <= 0) {
+                currentLeaves.add(op);
+            } else {
+                getLeaves(children, currentLeaves);
+            }
+        }
+    }
+
+    private void executeHyracksJob(JobSpecification job) throws Exception {
+
+        /**
+         * load the properties file if it is not loaded
+         */
+        if (clusterProps == null) {
+            clusterProps = new Properties();
+            InputStream confIn = new FileInputStream(clusterPropertiesPath);
+            clusterProps.load(confIn);
+            confIn.close();
+        }
+
+        if (hcc == null) {
+            BufferedReader ipReader = new BufferedReader(new InputStreamReader(new FileInputStream(masterFilePath)));
+            String masterNode = ipReader.readLine();
+            ipReader.close();
+
+            InetAddress[] ips = InetAddress.getAllByName(masterNode);
+            int port = Integer.parseInt(clusterProps.getProperty("CC_CLIENTPORT"));
+            for (InetAddress ip : ips) {
+                if (ip.getAddress().length <= 4) {
+                    try {
+                        hcc = new HyracksConnection(ip.getHostAddress(), port);
+                        break;
+                    } catch (Exception e) {
+                        continue;
+                    }
+                }
+            }
+        }
+
+        long start = System.currentTimeMillis();
+        JobId jobId = hcc.startJob(job);
+        hcc.waitForCompletion(jobId);
+
+        // System.out.println("job finished: " + jobId.toString());
+        // call all leave nodes to end
+        for (Operator leaf : leaveOps) {
+            jobClose(leaf);
+        }
+
+        long end = System.currentTimeMillis();
+        System.err.println(start + " " + end + " " + (end - start));
+    }
+
+    /**
+     * mv to final directory on hdfs (not real final)
+     * 
+     * @param leaf
+     * @throws Exception
+     */
+    private void jobClose(Operator leaf) throws Exception {
+        FileSinkOperator fsOp = (FileSinkOperator) leaf;
+        FileSinkDesc desc = fsOp.getConf();
+        boolean isNativeTable = !desc.getTableInfo().isNonNative();
+        if ((conf != null) && isNativeTable) {
+            String specPath = desc.getDirName();
+            DynamicPartitionCtx dpCtx = desc.getDynPartCtx();
+            // for 0.7.0
+            fsOp.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx);
+            // for 0.8.0
+            // Utilities.mvFileToFinalPath(specPath, conf, true, LOG, dpCtx,
+            // desc);
+        }
+    }
+}
diff --git a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
index 4c40f5d..4ef74e9 100644
--- a/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
+++ b/hivesterix/hivesterix-dist/src/main/java/org/apache/hadoop/hive/ql/Driver.java
@@ -444,10 +444,11 @@
 
             // hyracks run
             if (sem instanceof SemanticAnalyzer && command.toLowerCase().indexOf("create") < 0) {
-                hivesterix = true;
-                return engine.compileJob(sem.getRootTasks());
+                int engineRet = engine.compileJob(sem.getRootTasks());
+                if (engineRet == 0) {
+                    hivesterix = true;
+                }
             }
-
             return 0;
         } catch (SemanticException e) {
             errorMessage = "FAILED: Error in semantic analysis: " + e.getMessage();
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
index 31ad348..4cddaf0 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/job/PregelixJob.java
@@ -23,8 +23,8 @@
 import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
 import edu.uci.ics.pregelix.api.graph.MessageCombiner;
 import edu.uci.ics.pregelix.api.graph.NormalizedKeyComputer;
-import edu.uci.ics.pregelix.api.graph.VertexPartitioner;
 import edu.uci.ics.pregelix.api.graph.Vertex;
+import edu.uci.ics.pregelix.api.graph.VertexPartitioner;
 import edu.uci.ics.pregelix.api.io.VertexInputFormat;
 import edu.uci.ics.pregelix.api.io.VertexOutputFormat;
 
@@ -72,6 +72,8 @@
     public static final String JOB_ID = "pregelix.jobid";
     /** frame size */
     public static final String FRAME_SIZE = "pregelix.framesize";
+    /** update intensive */
+    public static final String UPDATE_INTENSIVE = "pregelix.updateIntensive";
 
     /**
      * Constructor that will instantiate the configuration
@@ -190,4 +192,13 @@
     final public void setVertexPartitionerClass(Class<?> partitionerClass) {
         getConfiguration().setClass(PARTITIONER_CLASS, partitionerClass, VertexPartitioner.class);
     }
+
+    /**
+     * Indicate if the job needs to do a lot of graph mutations or variable size updates
+     * 
+     * @param updateHeavyFlag
+     */
+    final public void setMutationOrVariableSizedUpdateHeavy(boolean variableSizedUpdateHeavyFlag) {
+        getConfiguration().setBoolean(UPDATE_INTENSIVE, variableSizedUpdateHeavyFlag);
+    }
 }
diff --git a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
index 759c850..03c37dc 100644
--- a/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
+++ b/pregelix/pregelix-api/src/main/java/edu/uci/ics/pregelix/api/util/BspUtils.java
@@ -494,4 +494,14 @@
     public static int getFrameSize(Configuration conf) {
         return conf.getInt(PregelixJob.FRAME_SIZE, -1);
     }
+
+    /**
+     * Should the job use LSM or B-tree to store vertices
+     * 
+     * @param conf
+     * @return
+     */
+    public static boolean useLSM(Configuration conf) {
+        return conf.getBoolean(PregelixJob.UPDATE_INTENSIVE, false);
+    }
 }
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
index 30e617d..e066637 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGen.java
@@ -53,10 +53,16 @@
 import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
 import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeSearchOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
+import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexDropOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexBulkLoadOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexCreateOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.btree.dataflow.LSMBTreeDataflowHelperFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.ConstantMergePolicyProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpOperationTrackerProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.SynchronousSchedulerProvider;
 import edu.uci.ics.hyracks.storage.common.IStorageManagerInterface;
 import edu.uci.ics.hyracks.storage.common.file.TransientLocalResourceFactoryProvider;
 import edu.uci.ics.pregelix.api.graph.GlobalAggregator;
@@ -81,6 +87,7 @@
 import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.bootstrap.IndexLifeCycleManagerProvider;
 import edu.uci.ics.pregelix.runtime.bootstrap.StorageManagerInterface;
+import edu.uci.ics.pregelix.runtime.bootstrap.VirtualBufferCacheProvider;
 import edu.uci.ics.pregelix.runtime.touchpoint.RuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.touchpoint.VertexIdPartitionComputerFactory;
 import edu.uci.ics.pregelix.runtime.touchpoint.VertexPartitionComputerFactory;
@@ -171,7 +178,7 @@
         IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, PRIMARY_INDEX);
         TreeIndexCreateOperatorDescriptor btreeCreate = new TreeIndexCreateOperatorDescriptor(spec,
                 storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories, null,
-                new BTreeDataflowHelperFactory(), new TransientLocalResourceFactoryProvider(),
+                getIndexDataflowHelperFactory(), new TransientLocalResourceFactoryProvider(),
                 NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, btreeCreate);
         spec.setFrameSize(frameSize);
@@ -232,7 +239,7 @@
         typeTraits[1] = new TypeTraits(false);
         TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
                 storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories, null,
-                fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, false, 0, new BTreeDataflowHelperFactory(),
+                fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, false, 0, getIndexDataflowHelperFactory(),
                 NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
 
@@ -358,7 +365,7 @@
         typeTraits[1] = new TypeTraits(false);
         BTreeSearchOperatorDescriptor scanner = new BTreeSearchOperatorDescriptor(spec, recordDescriptor,
                 storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories, null,
-                null, null, true, true, new BTreeDataflowHelperFactory(), false, NoOpOperationCallbackFactory.INSTANCE);
+                null, null, true, true, getIndexDataflowHelperFactory(), false, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -427,7 +434,7 @@
 
         BTreeSearchOperatorDescriptor scanner = new BTreeSearchOperatorDescriptor(spec, recordDescriptor,
                 storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories, null,
-                null, null, true, true, new BTreeDataflowHelperFactory(), false, NoOpOperationCallbackFactory.INSTANCE);
+                null, null, true, true, getIndexDataflowHelperFactory(), false, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -458,7 +465,7 @@
 
         IFileSplitProvider fileSplitProvider = ClusterConfig.getFileSplitProvider(jobId, indexName);
         IndexDropOperatorDescriptor drop = new IndexDropOperatorDescriptor(spec, storageManagerInterface,
-                lcManagerProvider, fileSplitProvider, new BTreeDataflowHelperFactory());
+                lcManagerProvider, fileSplitProvider, getIndexDataflowHelperFactory());
 
         ClusterConfig.setLocationConstraint(spec, drop);
         spec.addRoot(drop);
@@ -478,6 +485,16 @@
         }
     }
 
+    protected IIndexDataflowHelperFactory getIndexDataflowHelperFactory() {
+        if (BspUtils.useLSM(conf)) {
+            return new LSMBTreeDataflowHelperFactory(new VirtualBufferCacheProvider(), new ConstantMergePolicyProvider(
+                    3), NoOpOperationTrackerProvider.INSTANCE, SynchronousSchedulerProvider.INSTANCE,
+                    NoOpIOOperationCallback.INSTANCE, 0.01);
+        } else {
+            return new BTreeDataflowHelperFactory();
+        }
+    }
+
     /** generate non-first iteration job */
     protected abstract JobSpecification generateNonFirstIteration(int iteration) throws HyracksException;
 
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
index 548a4db..2bab291 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenInnerJoin.java
@@ -35,14 +35,9 @@
 import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
 import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
-import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
 import edu.uci.ics.pregelix.api.graph.MsgList;
 import edu.uci.ics.pregelix.api.job.PregelixJob;
 import edu.uci.ics.pregelix.api.util.BspUtils;
@@ -59,11 +54,11 @@
 import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
-import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.TreeIndexBulkReLoadOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
 import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.function.ComputeUpdateFunctionFactory;
@@ -136,9 +131,8 @@
         TreeSearchFunctionUpdateOperatorDescriptor scanner = new TreeSearchFunctionUpdateOperatorDescriptor(spec,
                 recordDescriptor, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
-                new BTreeDataflowHelperFactory(), inputRdFactory, 6,
-                new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete, rdFinal);
+                getIndexDataflowHelperFactory(), inputRdFactory, 6, new StartComputeUpdateFunctionFactory(confFactory),
+                preHookFactory, null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete, rdFinal);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -166,7 +160,7 @@
                 WritableComparator.get(vertexIdClass).getClass());
         TreeIndexBulkReLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkReLoadOperatorDescriptor(spec,
                 storageManagerInterface, lcManagerProvider, secondaryFileSplitProvider, typeTraits, indexCmpFactories,
-                fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, new BTreeDataflowHelperFactory());
+                fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, getIndexDataflowHelperFactory());
         ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
 
         /**
@@ -221,7 +215,7 @@
          */
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
                 null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
@@ -232,7 +226,7 @@
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
@@ -336,14 +330,9 @@
         ITypeTraits[] typeTraits = new ITypeTraits[2];
         typeTraits[0] = new TypeTraits(false);
         typeTraits[1] = new TypeTraits(false);
-        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
-        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
         IndexNestedLoopJoinOperatorDescriptor setUnion = new IndexNestedLoopJoinOperatorDescriptor(spec, rdFinal,
-                storageManagerInterface, lcManagerProvider, secondaryFileSplitProviderRead, interiorFrameFactory,
-                leafFrameFactory, typeTraits, comparatorFactories, true, keyFields, keyFields, true, true,
-                new BTreeDataflowHelperFactory(), true);
+                storageManagerInterface, lcManagerProvider, secondaryFileSplitProviderRead, typeTraits,
+                comparatorFactories, true, keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true);
         ClusterConfig.setLocationConstraint(spec, setUnion);
 
         /**
@@ -361,7 +350,7 @@
         IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
                 spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
                 JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true,
-                new BTreeDataflowHelperFactory(), inputRdFactory, 6, new ComputeUpdateFunctionFactory(confFactory),
+                getIndexDataflowHelperFactory(), inputRdFactory, 6, new ComputeUpdateFunctionFactory(confFactory),
                 preHookFactory, null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete, rdFinal);
         ClusterConfig.setLocationConstraint(spec, join);
 
@@ -376,7 +365,7 @@
         IFileSplitProvider secondaryFileSplitProviderWrite = ClusterConfig.getFileSplitProvider(jobId, writeFile);
         TreeIndexBulkReLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkReLoadOperatorDescriptor(spec,
                 storageManagerInterface, lcManagerProvider, secondaryFileSplitProviderWrite, typeTraits,
-                indexCmpFactories, fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, new BTreeDataflowHelperFactory());
+                indexCmpFactories, fieldPermutation, DEFAULT_BTREE_FILL_FACTOR, getIndexDataflowHelperFactory());
         ClusterConfig.setLocationConstraint(spec, btreeBulkLoad);
 
         /**
@@ -444,7 +433,7 @@
          */
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(),
                 null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
@@ -455,7 +444,7 @@
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                getIndexDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
index 1949172..3af8921 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoin.java
@@ -35,14 +35,9 @@
 import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
 import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
-import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
 import edu.uci.ics.pregelix.api.graph.MsgList;
 import edu.uci.ics.pregelix.api.job.PregelixJob;
 import edu.uci.ics.pregelix.api.util.BspUtils;
@@ -59,9 +54,9 @@
 import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
-import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
 import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.function.ComputeUpdateFunctionFactory;
@@ -130,9 +125,8 @@
         TreeSearchFunctionUpdateOperatorDescriptor scanner = new TreeSearchFunctionUpdateOperatorDescriptor(spec,
                 recordDescriptor, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
-                new BTreeDataflowHelperFactory(), inputRdFactory, 5,
-                new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                getIndexDataflowHelperFactory(), inputRdFactory, 5, new StartComputeUpdateFunctionFactory(confFactory),
+                preHookFactory, null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -204,8 +198,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -214,8 +208,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
@@ -315,10 +309,6 @@
         ITypeTraits[] typeTraits = new ITypeTraits[2];
         typeTraits[0] = new TypeTraits(false);
         typeTraits[1] = new TypeTraits(false);
-        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
-        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
         INullWriterFactory[] nullWriterFactories = new INullWriterFactory[2];
         nullWriterFactories[0] = VertexIdNullWriterFactory.INSTANCE;
         nullWriterFactories[1] = MsgListNullWriterFactory.INSTANCE;
@@ -332,11 +322,10 @@
                 vertexIdClass.getName(), MsgList.class.getName(), vertexIdClass.getName(), vertexClass.getName());
 
         IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
-                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, interiorFrameFactory,
-                leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
-                keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
-                new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
+                JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
+                nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
+                null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, join);
 
         /**
@@ -405,8 +394,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -415,8 +404,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
index 55a30f9..50949aa 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSingleSort.java
@@ -34,14 +34,9 @@
 import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
 import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
-import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
 import edu.uci.ics.pregelix.api.graph.MsgList;
 import edu.uci.ics.pregelix.api.job.PregelixJob;
 import edu.uci.ics.pregelix.api.util.BspUtils;
@@ -58,9 +53,9 @@
 import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
-import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
 import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.function.ComputeUpdateFunctionFactory;
@@ -132,9 +127,8 @@
         TreeSearchFunctionUpdateOperatorDescriptor scanner = new TreeSearchFunctionUpdateOperatorDescriptor(spec,
                 recordDescriptor, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
-                new BTreeDataflowHelperFactory(), inputRdFactory, 5,
-                new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                getIndexDataflowHelperFactory(), inputRdFactory, 5, new StartComputeUpdateFunctionFactory(confFactory),
+                preHookFactory, null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -196,8 +190,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -206,8 +200,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
@@ -304,10 +298,6 @@
         ITypeTraits[] typeTraits = new ITypeTraits[2];
         typeTraits[0] = new TypeTraits(false);
         typeTraits[1] = new TypeTraits(false);
-        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
-        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
         INullWriterFactory[] nullWriterFactories = new INullWriterFactory[2];
         nullWriterFactories[0] = VertexIdNullWriterFactory.INSTANCE;
         nullWriterFactories[1] = MsgListNullWriterFactory.INSTANCE;
@@ -321,11 +311,10 @@
                 vertexIdClass.getName(), MsgList.class.getName(), vertexIdClass.getName(), vertexClass.getName());
 
         IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
-                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, interiorFrameFactory,
-                leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
-                keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
-                new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
+                JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
+                nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
+                null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, join);
 
         /**
@@ -382,8 +371,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -392,8 +381,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
diff --git a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
index 4d58326..362e413 100644
--- a/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
+++ b/pregelix/pregelix-core/src/main/java/edu/uci/ics/pregelix/core/jobgen/JobGenOuterJoinSort.java
@@ -34,14 +34,9 @@
 import edu.uci.ics.hyracks.dataflow.std.group.IAggregatorDescriptorFactory;
 import edu.uci.ics.hyracks.dataflow.std.group.preclustered.PreclusteredGroupOperatorDescriptor;
 import edu.uci.ics.hyracks.dataflow.std.sort.ExternalSortOperatorDescriptor;
-import edu.uci.ics.hyracks.storage.am.btree.dataflow.BTreeDataflowHelperFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMInteriorFrameFactory;
-import edu.uci.ics.hyracks.storage.am.btree.frames.BTreeNSMLeafFrameFactory;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.TreeIndexInsertUpdateDeleteOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
 import edu.uci.ics.hyracks.storage.am.common.ophelpers.IndexOperation;
-import edu.uci.ics.hyracks.storage.am.common.tuples.TypeAwareTupleWriterFactory;
 import edu.uci.ics.pregelix.api.graph.MsgList;
 import edu.uci.ics.pregelix.api.job.PregelixJob;
 import edu.uci.ics.pregelix.api.util.BspUtils;
@@ -58,9 +53,9 @@
 import edu.uci.ics.pregelix.dataflow.MaterializingWriteOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.TerminationStateWriterOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.base.IConfigurationFactory;
-import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.IndexNestedLoopJoinFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.RuntimeHookOperatorDescriptor;
+import edu.uci.ics.pregelix.dataflow.std.TreeSearchFunctionUpdateOperatorDescriptor;
 import edu.uci.ics.pregelix.dataflow.std.base.IRecordDescriptorFactory;
 import edu.uci.ics.pregelix.dataflow.std.base.IRuntimeHookFactory;
 import edu.uci.ics.pregelix.runtime.function.ComputeUpdateFunctionFactory;
@@ -129,9 +124,8 @@
         TreeSearchFunctionUpdateOperatorDescriptor scanner = new TreeSearchFunctionUpdateOperatorDescriptor(spec,
                 recordDescriptor, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
                 comparatorFactories, JobGenUtil.getForwardScan(iteration), null, null, true, true,
-                new BTreeDataflowHelperFactory(), inputRdFactory, 5,
-                new StartComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                getIndexDataflowHelperFactory(), inputRdFactory, 5, new StartComputeUpdateFunctionFactory(confFactory),
+                preHookFactory, null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, scanner);
 
         /**
@@ -210,8 +204,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -220,8 +214,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
@@ -318,10 +312,6 @@
         ITypeTraits[] typeTraits = new ITypeTraits[2];
         typeTraits[0] = new TypeTraits(false);
         typeTraits[1] = new TypeTraits(false);
-        ITreeIndexFrameFactory interiorFrameFactory = new BTreeNSMInteriorFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
-        ITreeIndexFrameFactory leafFrameFactory = new BTreeNSMLeafFrameFactory(new TypeAwareTupleWriterFactory(
-                typeTraits));
         INullWriterFactory[] nullWriterFactories = new INullWriterFactory[2];
         nullWriterFactories[0] = VertexIdNullWriterFactory.INSTANCE;
         nullWriterFactories[1] = MsgListNullWriterFactory.INSTANCE;
@@ -335,11 +325,10 @@
                 vertexIdClass.getName(), MsgList.class.getName(), vertexIdClass.getName(), vertexClass.getName());
 
         IndexNestedLoopJoinFunctionUpdateOperatorDescriptor join = new IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(
-                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, interiorFrameFactory,
-                leafFrameFactory, typeTraits, comparatorFactories, JobGenUtil.getForwardScan(iteration), keyFields,
-                keyFields, true, true, new BTreeDataflowHelperFactory(), true, nullWriterFactories, inputRdFactory, 5,
-                new ComputeUpdateFunctionFactory(confFactory), preHookFactory, null, rdUnnestedMessage, rdDummy,
-                rdPartialAggregate, rdInsert, rdDelete);
+                spec, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits, comparatorFactories,
+                JobGenUtil.getForwardScan(iteration), keyFields, keyFields, true, true, getIndexDataflowHelperFactory(), true,
+                nullWriterFactories, inputRdFactory, 5, new ComputeUpdateFunctionFactory(confFactory), preHookFactory,
+                null, rdUnnestedMessage, rdDummy, rdPartialAggregate, rdInsert, rdDelete);
         ClusterConfig.setLocationConstraint(spec, join);
 
         /**
@@ -415,8 +404,8 @@
         int[] fieldPermutation = new int[] { 0, 1 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor insertOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdInsert, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, new BTreeDataflowHelperFactory(),
-                null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutation, IndexOperation.INSERT, getIndexDataflowHelperFactory(), null,
+                NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, insertOp);
 
         /**
@@ -425,8 +414,8 @@
         int[] fieldPermutationDelete = new int[] { 0 };
         TreeIndexInsertUpdateDeleteOperatorDescriptor deleteOp = new TreeIndexInsertUpdateDeleteOperatorDescriptor(
                 spec, rdDelete, storageManagerInterface, lcManagerProvider, fileSplitProvider, typeTraits,
-                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE,
-                new BTreeDataflowHelperFactory(), null, NoOpOperationCallbackFactory.INSTANCE);
+                comparatorFactories, null, fieldPermutationDelete, IndexOperation.DELETE, getIndexDataflowHelperFactory(),
+                null, NoOpOperationCallbackFactory.INSTANCE);
         ClusterConfig.setLocationConstraint(spec, deleteOp);
 
         /** construct empty sink operator */
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorDescriptor.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorDescriptor.java
index 0b3a7fe..e450380 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinFunctionUpdateOperatorDescriptor.java
@@ -26,7 +26,6 @@
 import edu.uci.ics.hyracks.api.job.JobSpecification;
 import edu.uci.ics.hyracks.dataflow.std.file.IFileSplitProvider;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexLifecycleManagerProvider;
-import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrameFactory;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.AbstractTreeIndexOperatorDescriptor;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
 import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallbackFactory;
@@ -91,8 +90,7 @@
 
     public IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(JobSpecification spec,
             IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lcManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITreeIndexFrameFactory interiorFrameFactory,
-            ITreeIndexFrameFactory leafFrameFactory, ITypeTraits[] typeTraits,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
             IBinaryComparatorFactory[] comparatorFactories, boolean isForward, int[] lowKeyFields, int[] highKeyFields,
             boolean lowKeyInclusive, boolean highKeyInclusive, IIndexDataflowHelperFactory opHelperFactory,
             boolean isRightOuter, INullWriterFactory[] nullWriterFactories, IRecordDescriptorFactory inputRdFactory,
@@ -125,8 +123,7 @@
 
     public IndexNestedLoopJoinFunctionUpdateOperatorDescriptor(JobSpecification spec,
             IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lcManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITreeIndexFrameFactory interiorFrameFactory,
-            ITreeIndexFrameFactory leafFrameFactory, ITypeTraits[] typeTraits,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
             IBinaryComparatorFactory[] comparatorFactories, boolean isForward, int[] lowKeyFields, int[] highKeyFields,
             boolean lowKeyInclusive, boolean highKeyInclusive, IIndexDataflowHelperFactory opHelperFactory,
             boolean isSetUnion, IRecordDescriptorFactory inputRdFactory, int outputArity,
diff --git a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinOperatorDescriptor.java b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinOperatorDescriptor.java
index 6dc713c..440ae86 100644
--- a/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinOperatorDescriptor.java
+++ b/pregelix/pregelix-dataflow-std/src/main/java/edu/uci/ics/pregelix/dataflow/std/IndexNestedLoopJoinOperatorDescriptor.java
@@ -87,8 +87,7 @@
 
     public IndexNestedLoopJoinOperatorDescriptor(JobSpecification spec, RecordDescriptor recDesc,
             IStorageManagerInterface storageManager, IIndexLifecycleManagerProvider lcManagerProvider,
-            IFileSplitProvider fileSplitProvider, ITreeIndexFrameFactory interiorFrameFactory,
-            ITreeIndexFrameFactory leafFrameFactory, ITypeTraits[] typeTraits,
+            IFileSplitProvider fileSplitProvider, ITypeTraits[] typeTraits,
             IBinaryComparatorFactory[] comparatorFactories, boolean isForward, int[] lowKeyFields, int[] highKeyFields,
             boolean lowKeyInclusive, boolean highKeyInclusive, IIndexDataflowHelperFactory opHelperFactory,
             boolean isSetUnion) {
diff --git a/pregelix/pregelix-dataflow/pom.xml b/pregelix/pregelix-dataflow/pom.xml
index 962c9f6..2828451 100644
--- a/pregelix/pregelix-dataflow/pom.xml
+++ b/pregelix/pregelix-dataflow/pom.xml
@@ -1,18 +1,14 @@
-<!--
- ! Copyright 2009-2013 by The Regents of the University of California
- ! Licensed under the Apache License, Version 2.0 (the "License");
- ! you may not use this file except in compliance with the License.
- ! you may obtain a copy of the License from
- ! 
- !     http://www.apache.org/licenses/LICENSE-2.0
- ! 
- ! Unless required by applicable law or agreed to in writing, software
- ! distributed under the License is distributed on an "AS IS" BASIS,
- ! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- ! See the License for the specific language governing permissions and
- ! limitations under the License.
- !-->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+<!-- ! Copyright 2009-2013 by The Regents of the University of California 
+	! Licensed under the Apache License, Version 2.0 (the "License"); ! you may 
+	not use this file except in compliance with the License. ! you may obtain 
+	a copy of the License from ! ! http://www.apache.org/licenses/LICENSE-2.0 
+	! ! Unless required by applicable law or agreed to in writing, software ! 
+	distributed under the License is distributed on an "AS IS" BASIS, ! WITHOUT 
+	WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ! See the 
+	License for the specific language governing permissions and ! limitations 
+	under the License. ! -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
 	<modelVersion>4.0.0</modelVersion>
 	<artifactId>pregelix-dataflow</artifactId>
 	<packaging>jar</packaging>
@@ -134,6 +130,13 @@
 		</dependency>
 		<dependency>
 			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-storage-am-lsm-common</artifactId>
+			<version>0.2.7-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
 			<artifactId>hyracks-control-cc</artifactId>
 			<version>0.2.7-SNAPSHOT</version>
 			<type>jar</type>
diff --git a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
index b86691c..2008cf0 100644
--- a/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
+++ b/pregelix/pregelix-dataflow/src/main/java/edu/uci/ics/pregelix/dataflow/context/RuntimeContext.java
@@ -31,6 +31,7 @@
 import edu.uci.ics.hyracks.control.nc.io.IOManager;
 import edu.uci.ics.hyracks.storage.am.common.api.IIndexLifecycleManager;
 import edu.uci.ics.hyracks.storage.am.common.dataflow.IndexLifecycleManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
 import edu.uci.ics.hyracks.storage.common.buffercache.BufferCache;
 import edu.uci.ics.hyracks.storage.common.buffercache.ClockPageReplacementStrategy;
 import edu.uci.ics.hyracks.storage.common.buffercache.HeapBufferAllocator;
@@ -52,6 +53,7 @@
     private final ILocalResourceRepository localResourceRepository;
     private final ResourceIdFactory resourceIdFactory;
     private final IBufferCache bufferCache;
+    private final IVirtualBufferCache vBufferCache = null;
     private final IFileMapManager fileMapManager;
     private final Map<StateKey, IStateObject> appStateMap = new ConcurrentHashMap<StateKey, IStateObject>();
     private final Map<String, Long> giraphJobIdToSuperStep = new ConcurrentHashMap<String, Long>();
@@ -76,6 +78,8 @@
         bufferCache = new BufferCache(appCtx.getRootContext().getIOManager(), allocator, prs,
                 new PreDelayPageCleanerPolicy(Long.MAX_VALUE), fileMapManager, pageSize, numPages, 1000000,
                 threadFactory);
+        //vBufferCache = new MultitenantVirtualBufferCache(new VirtualBufferCache(new HeapBufferAllocator(), pageSize,
+        //        numPages / 2));
         ioManager = (IOManager) appCtx.getRootContext().getIOManager();
         lcManager = new IndexLifecycleManager();
         localResourceRepository = new TransientLocalResourceRepository();
@@ -110,6 +114,10 @@
         return bufferCache;
     }
 
+    public IVirtualBufferCache getVirtualBufferCache() {
+        return vBufferCache;
+    }
+
     public IFileMapProvider getFileMapManager() {
         return fileMapManager;
     }
diff --git a/pregelix/pregelix-runtime/pom.xml b/pregelix/pregelix-runtime/pom.xml
index ae9f47e..54e2256 100644
--- a/pregelix/pregelix-runtime/pom.xml
+++ b/pregelix/pregelix-runtime/pom.xml
@@ -148,6 +148,13 @@
 		</dependency>
 		<dependency>
 			<groupId>edu.uci.ics.hyracks</groupId>
+			<artifactId>hyracks-storage-am-lsm-common</artifactId>
+			<version>0.2.7-SNAPSHOT</version>
+			<type>jar</type>
+			<scope>compile</scope>
+		</dependency>
+		<dependency>
+			<groupId>edu.uci.ics.hyracks</groupId>
 			<artifactId>hyracks-control-cc</artifactId>
 			<version>0.2.7-SNAPSHOT</version>
 			<type>jar</type>
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java
new file mode 100644
index 0000000..ec51047
--- /dev/null
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/bootstrap/VirtualBufferCacheProvider.java
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.runtime.bootstrap;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.IVirtualBufferCacheProvider;
+import edu.uci.ics.pregelix.dataflow.context.RuntimeContext;
+
+/**
+ * The virtual buffer cache provider
+ * 
+ * @author yingyib
+ */
+public class VirtualBufferCacheProvider implements IVirtualBufferCacheProvider {
+
+    private static final long serialVersionUID = 1L;
+
+    public VirtualBufferCacheProvider(){
+        
+    }
+    
+    @Override
+    public synchronized IVirtualBufferCache getVirtualBufferCache(IHyracksTaskContext ctx) {
+        return RuntimeContext.get(ctx).getVirtualBufferCache();
+    }
+}
diff --git a/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/NoOpUpdateFunctionFactory.java b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/NoOpUpdateFunctionFactory.java
new file mode 100644
index 0000000..88577c2
--- /dev/null
+++ b/pregelix/pregelix-runtime/src/main/java/edu/uci/ics/pregelix/runtime/function/NoOpUpdateFunctionFactory.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package edu.uci.ics.pregelix.runtime.function;
+
+import edu.uci.ics.hyracks.api.comm.IFrameWriter;
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunction;
+import edu.uci.ics.pregelix.dataflow.std.base.IUpdateFunctionFactory;
+
+/**
+ * No operation update function factory
+ * 
+ * @author yingyib
+ */
+public class NoOpUpdateFunctionFactory implements IUpdateFunctionFactory {
+    private static final long serialVersionUID = 1L;
+    public static NoOpUpdateFunctionFactory INSTANCE = new NoOpUpdateFunctionFactory();
+
+    private NoOpUpdateFunctionFactory() {
+
+    }
+
+    @Override
+    public IUpdateFunction createFunction() {
+        return new IUpdateFunction() {
+
+            @Override
+            public void open(IHyracksTaskContext ctx, RecordDescriptor rd, IFrameWriter... writer)
+                    throws HyracksDataException {
+
+            }
+
+            @Override
+            public void process(Object[] tuple) throws HyracksDataException {
+
+            }
+
+            @Override
+            public void close() throws HyracksDataException {
+
+            }
+
+            @Override
+            public void update(ITupleReference tupleRef, ArrayTupleBuilder cloneUpdateTb) throws HyracksDataException {
+
+            }
+
+        };
+    }
+
+}