[ASTERIXDB-3540][COMP] Fixed calculation of expected schema for pushdown

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
if the getField expr consisted of a function which needs to be
evaluated at runtime, the pushdown computer was not evaluating
those expression leading to incorrect computation.
eg:
1. `field-access-by-name`(t.r.p, x.y.age_field)
2. `field-access-by-name`(t.r.p, substring(x.y.age_field, 0, 4))

Ext-ref: MB-64730
Change-Id: Iac55527af143c292557158ca8e47e92538e93970
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19288
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Murtadha Hubail <mhubail@apache.org>
Integration-Tests: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/NOTICE b/asterixdb/NOTICE
index 06d538d..5118782 100644
--- a/asterixdb/NOTICE
+++ b/asterixdb/NOTICE
@@ -1,5 +1,5 @@
 Apache AsterixDB
-Copyright 2015-2024 The Apache Software Foundation
+Copyright 2015-2025 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
index b7632db..a9937d1 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/pushdown/ExpectedSchemaBuilder.java
@@ -22,6 +22,7 @@
 import static org.apache.asterix.optimizer.rules.pushdown.ExpressionValueAccessPushdownVisitor.SUPPORTED_FUNCTIONS;
 
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.asterix.om.functions.BuiltinFunctions;
@@ -37,6 +38,7 @@
 import org.apache.asterix.optimizer.rules.pushdown.schema.UnionExpectedSchemaNode;
 import org.apache.asterix.runtime.projection.DataProjectionInfo;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
+import org.apache.commons.lang3.mutable.Mutable;
 import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
 import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
@@ -72,6 +74,10 @@
     }
 
     public boolean setSchemaFromExpression(AbstractFunctionCallExpression expr, LogicalVariable producedVar) {
+        return buildExpectedSchemaNodes(expr, producedVar);
+    }
+
+    public boolean setSchemaFromCalculatedExpression(AbstractFunctionCallExpression expr, LogicalVariable producedVar) {
         //Parent always nested
         AbstractComplexExpectedSchemaNode parent = (AbstractComplexExpectedSchemaNode) buildNestedNode(expr);
         if (parent != null) {
@@ -111,6 +117,67 @@
         return !varToNode.isEmpty();
     }
 
+    private boolean buildExpectedSchemaNodes(ILogicalExpression expr, LogicalVariable producedVar) {
+        return buildNestedNodes(expr, producedVar);
+    }
+
+    private boolean buildNestedNodes(ILogicalExpression expr, LogicalVariable producedVar) {
+        //The current node expression
+        boolean changed = false;
+        if (expr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+            return false;
+        }
+        AbstractFunctionCallExpression myExpr = (AbstractFunctionCallExpression) expr;
+        if (!SUPPORTED_FUNCTIONS.contains(myExpr.getFunctionIdentifier()) || noArgsOrFirstArgIsConstant(myExpr)) {
+            // Check if the function consists of the Supported Functions
+            for (Mutable<ILogicalExpression> arg : myExpr.getArguments()) {
+                changed |= buildNestedNodes(arg.getValue(), producedVar);
+            }
+            return changed;
+        }
+        // if the child is not a function expression, then just one node.
+        if (BuiltinFunctions.ARRAY_STAR.equals(myExpr.getFunctionIdentifier())
+                || BuiltinFunctions.SCAN_COLLECTION.equals(myExpr.getFunctionIdentifier())) {
+            // these supported function won't have second child
+            IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr);
+            if (expectedSchemaNode != null) {
+                changed |= setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, producedVar);
+            }
+        } else {
+            ILogicalExpression childExpr = myExpr.getArguments().get(1).getValue();
+            if (childExpr.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
+                // must be a variable or constant
+                IExpectedSchemaNode expectedSchemaNode = buildNestedNode(expr);
+                if (expectedSchemaNode != null) {
+                    changed |= setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) expr, producedVar);
+                }
+            } else {
+                // as the childExpr is a function.
+                // if the function had been evaluated at compile time, it would have been
+                // evaluated at this stage of compilation.
+                // eg: field-access(t.r.p, substring("name",2,4))
+                // this will be evaluated to field-access(t.r.p, "me") at compile time itself.
+                // since the execution reached this branch, this means the childExpr
+                // need to be evaluated at runtime, hence the childExpr should also be checked
+                // for possible pushdown.
+                // eg: field-access(t.r.p, substring(x.y.age_field, 0, 4))
+                ILogicalExpression parentExpr = myExpr.getArguments().get(0).getValue();
+                IExpectedSchemaNode parentExpectedNode = buildNestedNode(parentExpr);
+                if (parentExpectedNode != null) {
+                    changed |=
+                            setSchemaFromCalculatedExpression((AbstractFunctionCallExpression) parentExpr, producedVar);
+                }
+                changed |= buildNestedNodes(childExpr, producedVar);
+            }
+        }
+        return changed;
+    }
+
+    private boolean noArgsOrFirstArgIsConstant(AbstractFunctionCallExpression myExpr) {
+        List<Mutable<ILogicalExpression>> args = myExpr.getArguments();
+        return args.isEmpty() || args.get(0).getValue().getExpressionTag() == LogicalExpressionTag.CONSTANT;
+    }
+
     private IExpectedSchemaNode buildNestedNode(ILogicalExpression expr) {
         //The current node expression
         AbstractFunctionCallExpression myExpr = (AbstractFunctionCallExpression) expr;
diff --git a/asterixdb/asterix-app/data/hdfs/parquet/friends.json b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
new file mode 100644
index 0000000..d708ad9
--- /dev/null
+++ b/asterixdb/asterix-app/data/hdfs/parquet/friends.json
@@ -0,0 +1 @@
+{ "id": "1", "name": "Monica", "x": { "y": { "age_field": "age" } }, "t": { "r": { "p": { "age": "26" } } } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index 316d261..7963132 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -272,6 +272,7 @@
         loadData(generatedDataBasePath, "", "heterogeneous_1.parquet", definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "heterogeneous_2.parquet", definition, definitionSegment, false, false);
         loadData(generatedDataBasePath, "", "parquetTypes.parquet", definition, definitionSegment, false, false);
+        loadData(generatedDataBasePath, "", "friends.parquet", definition, definitionSegment, false, false);
     }
 
     private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
new file mode 100644
index 0000000..a601a8d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.01.ddl.sqlpp
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+* Description  : Field access pushdown
+* Expected Res : Success
+* Date         : June 22nd 2020
+*/
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+
+CREATE TYPE ParquetType as {
+};
+
+CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING %adapter%
+(
+  %template%,
+  ("container"="playground"),
+  ("definition"="parquet-data/reviews"),
+  ("include"="*friends.parquet"),
+  ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
new file mode 100644
index 0000000..e72d412
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+EXPLAIN
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
new file mode 100644
index 0000000..d15ba8d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SET `compiler.external.field.pushdown` "true";
+
+SELECT t.r.g, `field-access-by-name`(t.r.p, x.y.age_field)
+FROM ParquetDataset;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
new file mode 100644
index 0000000..4806a28
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.02.plan
@@ -0,0 +1 @@
+"distribute result [$$24] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n-- DISTRIBUTE_RESULT  |PARTITIONED|\n  exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n  -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|\n    project ([$$24]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n    -- STREAM_PROJECT  |PARTITIONED|\n      assign [$$24] <- [{\"g\": $$25.getField(\"g\"), \"$1\": $$25.getField(\"p\").getField(\"$$ParquetDataset.getField(\"x\").getField(\"y\").getField(\"age_field\")\")}] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n      -- ASSIGN  |PARTITIONED|\n        assign [$$25] <- [$$ParquetDataset.getField(\"t\").getField(\"r\")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n        -- ASSIGN  |PARTITIONED|\n          exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]\n          -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|\n            data-scan []<-[$$ParquetDataset] <- test.ParquetDataset project ({t:{r:{p:any,g:any}},x:{y:{age_field:any}}}) [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]\n            -- DATASOURCE_SCAN  |PARTITIONED|\n              exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n              -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|\n                empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]\n                -- EMPTY_TUPLE_SOURCE  |PARTITIONED|\n"
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
new file mode 100644
index 0000000..2246335
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/parquet/ASTERIXDB-3540/ASTERIXDB-3540.03.adm
@@ -0,0 +1 @@
+{ "$1": "26" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 7242984..723c118 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -102,6 +102,12 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="external-dataset">
+      <compilation-unit name="common/parquet/ASTERIXDB-3540">
+        <placeholder name="adapter" value="S3" />
+        <output-dir compare="Clean-JSON">common/parquet/ASTERIXDB-3540</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
       <compilation-unit name="common/parquet/array-access-pushdown">
         <placeholder name="adapter" value="S3" />
         <output-dir compare="Text">common/parquet/array-access-pushdown</output-dir>
diff --git a/hyracks-fullstack/NOTICE b/hyracks-fullstack/NOTICE
index e9bb9a4..722db88 100644
--- a/hyracks-fullstack/NOTICE
+++ b/hyracks-fullstack/NOTICE
@@ -1,5 +1,5 @@
 Apache Hyracks and Algebricks
-Copyright 2015-2024 The Apache Software Foundation
+Copyright 2015-2025 The Apache Software Foundation
 
 This product includes software developed at
 The Apache Software Foundation (http://www.apache.org/).