[ASTERIXDB-2933][COMP][EXT] Pushdowns Part1: Expected Schema Node

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
We want to have a more aggressive pushdown for Parquet. To do so,
we need to compute the expected schema from the expressions in the
query plan. This change introduces the data structure that holds the
expected schema information.

Note:
Moved from asterix-om to asterix-algebra as the expected schema should
not be exposed outside the compiler.

Change-Id: I4b5aef414050a518fe230326e92ed66734b7b1d2
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12765
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wael Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractComplexExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractComplexExpectedSchemaNode.java
new file mode 100644
index 0000000..5da5149
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractComplexExpectedSchemaNode.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public abstract class AbstractComplexExpectedSchemaNode extends AbstractExpectedSchemaNode {
+
+    AbstractComplexExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        super(parent, sourceLocation, functionName);
+    }
+
+    @Override
+    public IExpectedSchemaNode replaceIfNeeded(ExpectedSchemaNodeType expectedNodeType, SourceLocation sourceLocation,
+            String functionName) {
+        //If no change is required, return the same node
+        IExpectedSchemaNode node = this;
+        if (expectedNodeType == ExpectedSchemaNodeType.ANY) {
+            /*
+             * We want to fall back to ANY. This could happen if we needed one nested value in one expression but
+             * another expression, the entire node is needed. So, we fall back to ANY and remove any information
+             * about the nested value. For example:
+             * SELECT t.hashtags[*].text, t.hashtags
+             * FROM Tweets t
+             * In this case, we first saw (t.hashtags[*].text), but the next expression (t.hashtags) requested
+             * the entire hashtags. So, the expected type for hashtags should be ANY
+             */
+            node = new AnyExpectedSchemaNode(getParent(), getSourceLocation(), getFunctionName());
+            getParent().replaceChild(this, node);
+        } else if (expectedNodeType != getType()) {
+            /*
+             * We need to change the type to UNION, as the same value was accessed as an ARRAY and as an OBJECT.
+             * This is possible if we have heterogeneous value access in the query.
+             */
+
+            //Create UNION node and its parent is the parent of this
+            UnionExpectedSchemaNode unionSchemaNode =
+                    new UnionExpectedSchemaNode(getParent(), getSourceLocation(), getFunctionName());
+
+            //Add this as a child of UNION
+            unionSchemaNode.addChild(this);
+            /*
+             * Replace the reference of this in its parent with the union node
+             * Before: parent --> this
+             * After:  parent --> UNION --> this
+             */
+            getParent().replaceChild(this, unionSchemaNode);
+            /*
+             * Set the parent of this to union
+             * Before: oldParent <-- this
+             * After:  oldParent <-- UNION <-- this
+             */
+            setParent(unionSchemaNode);
+            /*
+             * Add the new child with the expected type to union
+             * Before: UNION <-- this
+             * After:  UNION <-- (this, newChild)
+             */
+            unionSchemaNode.createChild(expectedNodeType, sourceLocation, functionName);
+            node = unionSchemaNode;
+        }
+        return node;
+    }
+
+    protected abstract void replaceChild(IExpectedSchemaNode oldNode, IExpectedSchemaNode newNode);
+
+    public static AbstractComplexExpectedSchemaNode createNestedNode(ExpectedSchemaNodeType type,
+            AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation, String functionName) {
+        switch (type) {
+            case ARRAY:
+                return new ArrayExpectedSchemaNode(parent, sourceLocation, functionName);
+            case OBJECT:
+                return new ObjectExpectedSchemaNode(parent, sourceLocation, functionName);
+            case UNION:
+                return new UnionExpectedSchemaNode(parent, sourceLocation, functionName);
+            default:
+                throw new IllegalStateException(type + " is not nested or unknown");
+        }
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractExpectedSchemaNode.java
new file mode 100644
index 0000000..3c6cc95
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AbstractExpectedSchemaNode.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+abstract class AbstractExpectedSchemaNode implements IExpectedSchemaNode {
+    private AbstractComplexExpectedSchemaNode parent;
+    private final SourceLocation sourceLocation;
+    private final String functionName;
+
+    AbstractExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        this.parent = parent;
+        this.sourceLocation = sourceLocation;
+        this.functionName = functionName;
+    }
+
+    @Override
+    public final AbstractComplexExpectedSchemaNode getParent() {
+        return parent;
+    }
+
+    @Override
+    public final SourceLocation getSourceLocation() {
+        return sourceLocation;
+    }
+
+    @Override
+    public final String getFunctionName() {
+        return functionName;
+    }
+
+    protected void setParent(AbstractComplexExpectedSchemaNode parent) {
+        this.parent = parent;
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AnyExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AnyExpectedSchemaNode.java
new file mode 100644
index 0000000..834a405
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/AnyExpectedSchemaNode.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public class AnyExpectedSchemaNode extends AbstractExpectedSchemaNode {
+
+    public AnyExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        super(parent, sourceLocation, functionName);
+    }
+
+    @Override
+    public IExpectedSchemaNode replaceIfNeeded(ExpectedSchemaNodeType expectedNodeType, SourceLocation sourceLocation,
+            String functionName) {
+        if (expectedNodeType == ExpectedSchemaNodeType.ANY) {
+            return this;
+        }
+        /*
+         * ANY node is typeless (i.e., we do not know what is the possible type of ANY node) when we created it.
+         * However, now the query says it is (possibly) a nested value. We know that because there is a field
+         * access expression or an array access expression on that node. So, we should replace the ANY node to
+         * the given nested type.
+         */
+        AbstractComplexExpectedSchemaNode parent = getParent();
+        AbstractComplexExpectedSchemaNode nestedNode = AbstractComplexExpectedSchemaNode
+                .createNestedNode(expectedNodeType, parent, getSourceLocation(), functionName);
+        parent.replaceChild(this, nestedNode);
+        return nestedNode;
+    }
+
+    @Override
+    public ExpectedSchemaNodeType getType() {
+        return ExpectedSchemaNodeType.ANY;
+    }
+
+    @Override
+    public <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg) {
+        return visitor.visit(this, arg);
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ArrayExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ArrayExpectedSchemaNode.java
new file mode 100644
index 0000000..b000dc8
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ArrayExpectedSchemaNode.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public class ArrayExpectedSchemaNode extends AbstractComplexExpectedSchemaNode {
+    private IExpectedSchemaNode child;
+
+    ArrayExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        super(parent, sourceLocation, functionName);
+    }
+
+    @Override
+    public ExpectedSchemaNodeType getType() {
+        return ExpectedSchemaNodeType.ARRAY;
+    }
+
+    public IExpectedSchemaNode getChild() {
+        return child;
+    }
+
+    public void addChild(IExpectedSchemaNode child) {
+        this.child = child;
+    }
+
+    @Override
+    public <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg) {
+        return visitor.visit(this, arg);
+    }
+
+    @Override
+    public void replaceChild(IExpectedSchemaNode oldNode, IExpectedSchemaNode newNode) {
+        if (oldNode != child) {
+            //this should not happen
+            throw new IllegalStateException("Node " + oldNode.getType() + " is not a child");
+        }
+        child = newNode;
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaNodeType.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaNodeType.java
new file mode 100644
index 0000000..e6c88b4
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ExpectedSchemaNodeType.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+public enum ExpectedSchemaNodeType {
+    ARRAY,
+    OBJECT,
+    UNION,
+    ANY
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNode.java
new file mode 100644
index 0000000..454e32e
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNode.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+/**
+ * An interface of the expected schema of value access expressions in a query.
+ */
+public interface IExpectedSchemaNode {
+
+    /**
+     * @return node type
+     */
+    ExpectedSchemaNodeType getType();
+
+    /**
+     * @return source location of the value access
+     */
+    SourceLocation getSourceLocation();
+
+    /**
+     * @return value access function name
+     */
+    String getFunctionName();
+
+    /**
+     * @return the parent of a node
+     */
+    AbstractComplexExpectedSchemaNode getParent();
+
+    /**
+     * For visiting a node
+     *
+     * @param visitor schema node visitor
+     * @param arg     any argument might be needed by the visitor
+     * @param <R>     return type
+     * @param <T>     argument type
+     */
+    <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg);
+
+    /**
+     * Replace a node from one type to another
+     * Example:
+     * - {@link ExpectedSchemaNodeType#ANY} to {@link ExpectedSchemaNodeType#OBJECT}
+     * - {@link ExpectedSchemaNodeType#OBJECT} to {@link ExpectedSchemaNodeType#UNION}
+     *
+     * @param expectedNodeType what is the other expected type
+     * @param sourceLocation   source location of the value access
+     * @param functionName     function name as in {@link FunctionIdentifier#getName()}
+     * @see AbstractComplexExpectedSchemaNode#replaceIfNeeded(ExpectedSchemaNodeType, SourceLocation, String)
+     * @see UnionExpectedSchemaNode#replaceIfNeeded(ExpectedSchemaNodeType, SourceLocation, String)
+     */
+    IExpectedSchemaNode replaceIfNeeded(ExpectedSchemaNodeType expectedNodeType, SourceLocation sourceLocation,
+            String functionName);
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNodeVisitor.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNodeVisitor.java
new file mode 100644
index 0000000..754503a
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/IExpectedSchemaNodeVisitor.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+public interface IExpectedSchemaNodeVisitor<R, T> {
+
+    R visit(RootExpectedSchemaNode node, T arg);
+
+    R visit(ObjectExpectedSchemaNode node, T arg);
+
+    R visit(ArrayExpectedSchemaNode node, T arg);
+
+    R visit(UnionExpectedSchemaNode node, T arg);
+
+    R visit(AnyExpectedSchemaNode node, T arg);
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ObjectExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ObjectExpectedSchemaNode.java
new file mode 100644
index 0000000..02307c0
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/ObjectExpectedSchemaNode.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public class ObjectExpectedSchemaNode extends AbstractComplexExpectedSchemaNode {
+    private final Map<String, IExpectedSchemaNode> children;
+
+    ObjectExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        super(parent, sourceLocation, functionName);
+        children = new HashMap<>();
+    }
+
+    public Set<Map.Entry<String, IExpectedSchemaNode>> getChildren() {
+        return children.entrySet();
+    }
+
+    public IExpectedSchemaNode addChild(String fieldName, IExpectedSchemaNode child) {
+        children.put(fieldName, child);
+        return child;
+    }
+
+    @Override
+    public ExpectedSchemaNodeType getType() {
+        return ExpectedSchemaNodeType.OBJECT;
+    }
+
+    @Override
+    public <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg) {
+        return visitor.visit(this, arg);
+    }
+
+    @Override
+    public void replaceChild(IExpectedSchemaNode oldNode, IExpectedSchemaNode newNode) {
+        String key = null;
+        for (Map.Entry<String, IExpectedSchemaNode> child : children.entrySet()) {
+            if (child.getValue() == oldNode) {
+                key = child.getKey();
+                break;
+            }
+        }
+
+        if (key == null) {
+            //this should not happen
+            throw new IllegalStateException("Node " + oldNode.getType() + " is not a child");
+        }
+        children.replace(key, newNode);
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/RootExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/RootExpectedSchemaNode.java
new file mode 100644
index 0000000..1a978f3
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/RootExpectedSchemaNode.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public class RootExpectedSchemaNode extends ObjectExpectedSchemaNode {
+    //Root with zero fields
+    private static final int EMPTY_ROOT = 0;
+    //Root with the entire fields
+    private static final int ALL_FIELDS_ROOT = 1;
+    //Root with custom fields
+    private static final int CLIPPED_ROOT = 2;
+    public static final RootExpectedSchemaNode ALL_FIELDS_ROOT_NODE = new RootExpectedSchemaNode(ALL_FIELDS_ROOT);
+    public static final RootExpectedSchemaNode EMPTY_ROOT_NODE = new RootExpectedSchemaNode(EMPTY_ROOT);
+
+    private final int rootType;
+
+    RootExpectedSchemaNode() {
+        this(CLIPPED_ROOT);
+    }
+
+    private RootExpectedSchemaNode(int rootType) {
+        super(null, null, null);
+        this.rootType = rootType;
+    }
+
+    public int getRootType() {
+        return rootType;
+    }
+
+    @Override
+    public AbstractComplexExpectedSchemaNode replaceIfNeeded(ExpectedSchemaNodeType expectedNodeType,
+            SourceLocation sourceLocation, String functionName) {
+        if (rootType == ALL_FIELDS_ROOT) {
+            //ALL_FIELDS_ROOT. Return a new CLIPPED_ROOT root
+            return new RootExpectedSchemaNode();
+        }
+        return this;
+    }
+
+    @Override
+    public <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg) {
+        return visitor.visit(this, arg);
+    }
+
+    public boolean isEmpty() {
+        return rootType == EMPTY_ROOT;
+    }
+
+    public boolean isAllFields() {
+        return rootType == ALL_FIELDS_ROOT;
+    }
+}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/UnionExpectedSchemaNode.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/UnionExpectedSchemaNode.java
new file mode 100644
index 0000000..3a675b8
--- /dev/null
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/schema/UnionExpectedSchemaNode.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.optimizer.rules.pushdown.schema;
+
+import java.util.EnumMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hyracks.api.exceptions.SourceLocation;
+
+public class UnionExpectedSchemaNode extends AbstractComplexExpectedSchemaNode {
+    private final Map<ExpectedSchemaNodeType, AbstractComplexExpectedSchemaNode> children;
+
+    protected UnionExpectedSchemaNode(AbstractComplexExpectedSchemaNode parent, SourceLocation sourceLocation,
+            String functionName) {
+        super(parent, sourceLocation, functionName);
+        children = new EnumMap<>(ExpectedSchemaNodeType.class);
+    }
+
+    /**
+     * A UNION type must have both ARRAY and OBJECT when first created - the only possible values. Thus, we cannot
+     * replace a child of a UNION type to ANY. We can only replace the union itself to ANY.
+     */
+    @Override
+    protected void replaceChild(IExpectedSchemaNode oldChildNode, IExpectedSchemaNode newChildNode) {
+        throw new UnsupportedOperationException("Cannot replace a child of UNION");
+    }
+
+    protected void addChild(AbstractComplexExpectedSchemaNode node) {
+        children.put(node.getType(), node);
+    }
+
+    public void createChild(ExpectedSchemaNodeType nodeType, SourceLocation sourceLocation, String functionName) {
+        children.computeIfAbsent(nodeType, k -> createNestedNode(k, this, sourceLocation, functionName));
+    }
+
+    public AbstractComplexExpectedSchemaNode getChild(ExpectedSchemaNodeType type) {
+        return children.get(type);
+    }
+
+    public Set<Map.Entry<ExpectedSchemaNodeType, AbstractComplexExpectedSchemaNode>> getChildren() {
+        return children.entrySet();
+    }
+
+    @Override
+    public ExpectedSchemaNodeType getType() {
+        return ExpectedSchemaNodeType.UNION;
+    }
+
+    @Override
+    public <R, T> R accept(IExpectedSchemaNodeVisitor<R, T> visitor, T arg) {
+        return visitor.visit(this, arg);
+    }
+
+    /**
+     * We override this method to handle heterogeneous values while UNION exists. We do not need to create another
+     * UNION type - we simply return this. In case we want to fallback to ANY node, we call the super method.
+     *
+     * @param expectedNodeType the expected type
+     * @param sourceLocation   source location of the value access
+     * @param functionName     function name of the expression
+     * @return ANY or this
+     */
+    @Override
+    public IExpectedSchemaNode replaceIfNeeded(ExpectedSchemaNodeType expectedNodeType, SourceLocation sourceLocation,
+            String functionName) {
+        if (expectedNodeType == ExpectedSchemaNodeType.ANY) {
+            return super.replaceIfNeeded(expectedNodeType, sourceLocation, functionName);
+        }
+        return this;
+    }
+}