[ASTERIXDB-3410][COMP] Stack overflow with heavily nested queries
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
With this change we avoid inlining an expression if it is referencing the
same variable more than a fixed number of times (128 by default). We add a
compiler property compiler.max.variable.occurrences.inlining to update this
value.
Ext-ref: MB-63183
Change-Id: Ie3fed69bf915535302664beb335d32ccc4988afe
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18685
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
index 4469dbd..78b1e2b 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
@@ -95,11 +95,11 @@
CompilerProperties.COMPILER_FORCE_JOIN_ORDER_KEY, CompilerProperties.COMPILER_QUERY_PLAN_SHAPE_KEY,
CompilerProperties.COMPILER_MIN_MEMORY_ALLOCATION_KEY, CompilerProperties.COMPILER_COLUMN_FILTER_KEY,
CompilerProperties.COMPILER_BATCH_LOOKUP_KEY, FunctionUtil.IMPORT_PRIVATE_FUNCTIONS,
- FuzzyUtils.SIM_FUNCTION_PROP_NAME, FuzzyUtils.SIM_THRESHOLD_PROP_NAME,
- StartFeedStatement.WAIT_FOR_COMPLETION, FeedActivityDetails.FEED_POLICY_NAME,
- FeedActivityDetails.COLLECT_LOCATIONS, SqlppQueryRewriter.INLINE_WITH_OPTION,
- SqlppExpressionToPlanTranslator.REWRITE_IN_AS_OR_OPTION, "hash_merge", "output-record-type",
- DisjunctivePredicateToJoinRule.REWRITE_OR_AS_JOIN_OPTION,
+ CompilerProperties.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY, FuzzyUtils.SIM_FUNCTION_PROP_NAME,
+ FuzzyUtils.SIM_THRESHOLD_PROP_NAME, StartFeedStatement.WAIT_FOR_COMPLETION,
+ FeedActivityDetails.FEED_POLICY_NAME, FeedActivityDetails.COLLECT_LOCATIONS,
+ SqlppQueryRewriter.INLINE_WITH_OPTION, SqlppExpressionToPlanTranslator.REWRITE_IN_AS_OR_OPTION,
+ "hash_merge", "output-record-type", DisjunctivePredicateToJoinRule.REWRITE_OR_AS_JOIN_OPTION,
SetAsterixPhysicalOperatorsRule.REWRITE_ATTEMPT_BATCH_ASSIGN,
EquivalenceClassUtils.REWRITE_INTERNAL_QUERYUID_PK, SqlppQueryRewriter.SQL_COMPAT_OPTION,
JoinEnum.CBO_FULL_ENUM_LEVEL_KEY, JoinEnum.CBO_CP_ENUM_KEY));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.1.ddl.sqlpp
new file mode 100644
index 0000000..4271f99
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.1.ddl.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: This test case is to verify the fix for ASTERIXDB-3419
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+
+use test;
+
+create type dt1 as {itemid: int};
+
+create dataset collection1(dt1) PRIMARY KEY itemid;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.2.update.sqlpp
new file mode 100644
index 0000000..d805bd0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.2.update.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+insert into collection1
+([
+ {
+ "categories": "Category 1, Category 2, Category 6, Category 9, Category 10, Category 12, Category 13, Category 14, Category 16, Category 17, Category 18, Category 19, Category 20, Category 21, Category 22",
+ "itemid":10,
+ "description":"ABC"
+ },
+ {
+ "categories": "Category 1, Category 3, Category 5, Category 7, Category 8, Category 9, Category 10, Category 19, Category 10, Category 20, Category 21, Category 22, Category 23, Category 31, Category 32",
+ "itemid":12,
+ "description":"XYZ"
+ }
+]);
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.query.sqlpp
new file mode 100644
index 0000000..9081a11
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.query.sqlpp
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: This test case is to verify the fix for ASTERIXDB-3410
+ */
+
+use test;
+
+SELECT VALUE OBJECT_REMOVE(t, 'categories')
+FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 1")) AS `Category 1`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 2")) AS `Category 2`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 3")) AS `Category 3`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 4")) AS `Category 4`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 5")) AS `Category 5`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 6")) AS `Category 6`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 7")) AS `Category 7`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 8")) AS `Category 8`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 9")) AS `Category 9`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 10")) AS `Category 10`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 11")) AS `Category 11`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 12")) AS `Category 12`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 13")) AS `Category 13`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 14")) AS `Category 14`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 15")) AS `Category 15`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 16")) AS `Category 16`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 17")) AS `Category 17`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 18")) AS `Category 18`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 19")) AS `Category 19`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 20")) AS `Category 20`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 21")) AS `Category 21`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 22")) AS `Category 22`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 23")) AS `Category 23`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 24")) AS `Category 24`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 25")) AS `Category 25`
+ FROM (
+ SELECT t.*,
+ to_bigint(CONTAINS(categories, "Category 26")) AS `Category 26`
+ FROM collection1 t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ) AS t ORDER BY itemid;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index 52981ae..c364a58 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@ -41,6 +41,7 @@
"compiler\.indexonly" : true,
"compiler\.internal\.sanitycheck" : true,
"compiler\.joinmemory" : 262144,
+ "compiler\.max\.variable\.occurrences\.inlining" : 128,
"compiler.min.groupmemory" : 524288,
"compiler.min.joinmemory" : 524288,
"compiler\.min\.memory\.allocation" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index 0d4967a..9e85426 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@ -41,6 +41,7 @@
"compiler\.indexonly" : true,
"compiler\.internal\.sanitycheck" : false,
"compiler\.joinmemory" : 262144,
+ "compiler\.max\.variable\.occurrences\.inlining" : 128,
"compiler.min.groupmemory" : 524288,
"compiler.min.joinmemory" : 524288,
"compiler\.min\.memory\.allocation" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 0295ab2..ac00566 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@ -41,6 +41,7 @@
"compiler\.indexonly" : true,
"compiler\.internal\.sanitycheck" : false,
"compiler\.joinmemory" : 262144,
+ "compiler\.max\.variable\.occurrences\.inlining" : 128,
"compiler.min.groupmemory" : 524288,
"compiler.min.joinmemory" : 524288,
"compiler\.min\.memory\.allocation" : true,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.adm
new file mode 100644
index 0000000..2c44767
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/query-ASTERIXDB-3410/query-ASTERIXDB-3410.3.adm
@@ -0,0 +1,2 @@
+{ "Category 1": 1, "Category 2": 1, "Category 3": 0, "Category 4": 0, "Category 5": 0, "Category 6": 1, "Category 7": 0, "Category 8": 0, "Category 9": 1, "Category 10": 1, "Category 11": 0, "Category 12": 1, "Category 13": 1, "Category 14": 1, "Category 15": 0, "Category 16": 1, "Category 17": 1, "Category 18": 1, "Category 19": 1, "Category 20": 1, "Category 21": 1, "Category 22": 1, "Category 23": 0, "Category 24": 0, "Category 25": 0, "Category 26": 0, "itemid": 10, "description": "ABC" }
+{ "Category 1": 1, "Category 2": 1, "Category 3": 1, "Category 4": 0, "Category 5": 1, "Category 6": 0, "Category 7": 1, "Category 8": 1, "Category 9": 1, "Category 10": 1, "Category 11": 0, "Category 12": 0, "Category 13": 0, "Category 14": 0, "Category 15": 0, "Category 16": 0, "Category 17": 0, "Category 18": 0, "Category 19": 1, "Category 20": 1, "Category 21": 1, "Category 22": 1, "Category 23": 1, "Category 24": 0, "Category 25": 0, "Category 26": 0, "itemid": 12, "description": "XYZ" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index 338cd8d..0bed6d8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -7501,6 +7501,11 @@
<output-dir compare="Text">query-ASTERIXDB-3481</output-dir>
</compilation-unit>
</test-case>
+ <test-case FilePath="misc">
+ <compilation-unit name="query-ASTERIXDB-3410">
+ <output-dir compare="Text">query-ASTERIXDB-3410</output-dir>
+ </compilation-unit>
+ </test-case>
</test-group>
<test-group name="multipart-dataverse">
<test-case FilePath="multipart-dataverse">
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
index 8ca5c94..957cb84 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
@@ -152,7 +152,11 @@
COMPILER_COPY_TO_WRITE_BUFFER_SIZE(
getRangedIntegerType(5, Integer.MAX_VALUE),
StorageUtil.getIntSizeInBytes(8, StorageUtil.StorageUnit.MEGABYTE),
- "The COPY TO write buffer size in bytes. (default: 8MB, min: 5MB)");
+ "The COPY TO write buffer size in bytes. (default: 8MB, min: 5MB)"),
+ COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING(
+ getRangedIntegerType(0, Integer.MAX_VALUE),
+ 128,
+ "Maximum occurrences of a variable allowed in an expression for inlining");
private final IOptionType type;
private final Object defaultValue;
@@ -234,6 +238,9 @@
public static final String COMPILER_COLUMN_FILTER_KEY = Option.COMPILER_COLUMN_FILTER.ini();
+ public static final String COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY =
+ Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING.ini();
+
public static final int COMPILER_PARALLELISM_AS_STORAGE = 0;
public CompilerProperties(PropertiesAccessor accessor) {
@@ -369,4 +376,8 @@
public int getCopyToWriteBufferSize() {
return accessor.getInt(Option.COMPILER_COPY_TO_WRITE_BUFFER_SIZE);
}
+
+ public int getMaxVariableOccurrencesForInlining() {
+ return accessor.getInt(Option.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING);
+ }
}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
index 160c04d..28ab077 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
@@ -95,6 +95,8 @@
compilerProperties.getQueryPlanShapeMode());
boolean columnFilter = getBoolean(querySpecificConfig, CompilerProperties.COMPILER_COLUMN_FILTER_KEY,
compilerProperties.isColumnFilter());
+ int maxVariableOccurrencesForInlining =
+ getMaxVariableOccurrencesForInlining(compilerProperties, querySpecificConfig, sourceLoc);
PhysicalOptimizationConfig physOptConf = new PhysicalOptimizationConfig();
physOptConf.setFrameSize(frameSize);
@@ -123,6 +125,7 @@
physOptConf.setMinJoinFrames(compilerProperties.getMinJoinMemoryFrames());
physOptConf.setMinGroupFrames(compilerProperties.getMinGroupMemoryFrames());
physOptConf.setMinWindowFrames(compilerProperties.getMinWindowMemoryFrames());
+ physOptConf.setMaxVariableOccurrencesForInlining(maxVariableOccurrencesForInlining);
// We should have already validated the parameter names at this point...
Set<String> filteredParameterNames = new HashSet<>(parameterNames);
@@ -219,4 +222,16 @@
}
return defaultValue;
}
+
+ private static int getMaxVariableOccurrencesForInlining(CompilerProperties compilerProperties,
+ Map<String, Object> querySpecificConfig, SourceLocation sourceLoc) throws AsterixException {
+ String valueInQuery =
+ (String) querySpecificConfig.get(CompilerProperties.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY);
+ try {
+ return valueInQuery == null ? compilerProperties.getMaxVariableOccurrencesForInlining()
+ : OptionTypes.NONNEGATIVE_INTEGER.parse(valueInQuery);
+ } catch (IllegalArgumentException e) {
+ throw AsterixException.create(ErrorCode.COMPILATION_ERROR, sourceLoc, e.getMessage());
+ }
+ }
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index 98c4223..465cd29 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -46,4 +46,5 @@
StorageUtil.getIntSizeInBytes(8, StorageUtil.StorageUnit.KILOBYTE);
public static final boolean BATCH_LOOKUP_DEFAULT = true;
public static final boolean COLUMN_FILTER_DEFAULT = true;
+ public static final int MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT = 128;
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
index 11171a1..07e8bfc 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/rewriter/base/PhysicalOptimizationConfig.java
@@ -62,6 +62,7 @@
private static final String MIN_JOIN_FRAMES = "MIN_JOIN_FRAMES";
private static final String MIN_GROUP_FRAMES = "MIN_GROUP_FRAMES";
private static final String MIN_WINDOW_FRAMES = "MIN_WINDOW_FRAMES";
+ private static final String MAX_VARIABLE_OCCURRENCES_INLINING = "MAX_VARIABLE_OCCURRENCES_INLINING";
private final Properties properties = new Properties();
@@ -383,6 +384,14 @@
return properties.getProperty(property);
}
+ public int getMaxVariableOccurrencesForInlining() {
+ return getInt(MAX_VARIABLE_OCCURRENCES_INLINING, AlgebricksConfig.MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT);
+ }
+
+ public void setMaxVariableOccurrencesForInlining(int maxVariableOccurrencesForInlining) {
+ setInt(MAX_VARIABLE_OCCURRENCES_INLINING, maxVariableOccurrencesForInlining);
+ }
+
private void setInt(String property, int value) {
properties.setProperty(property, Integer.toString(value));
}
diff --git a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
index 6ad50e6..5970f4f 100644
--- a/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
+++ b/hyracks-fullstack/algebricks/algebricks-rewriter/src/main/java/org/apache/hyracks/algebricks/rewriter/rules/InlineVariablesRule.java
@@ -19,6 +19,7 @@
package org.apache.hyracks.algebricks.rewriter.rules;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -84,6 +85,7 @@
private final List<LogicalVariable> usedVars = new ArrayList<>();
// map of variables and the counts of how many times they were used
private final Map<LogicalVariable, MutableInt> usedVariableCounter = new HashMap<>();
+ private final Map<LogicalVariable, Map<LogicalVariable, Integer>> totalLeafVariableCounter = new HashMap<>();
@Override
public boolean rewritePost(Mutable<ILogicalOperator> opRef, IOptimizationContext context) {
@@ -113,6 +115,7 @@
inlineVisitor.setContext(context);
subTreesDone.clear();
usedVariableCounter.clear();
+ totalLeafVariableCounter.clear();
}
protected boolean performBottomUpAction(ILogicalOperator op) throws AlgebricksException {
@@ -168,6 +171,20 @@
}
}
+ if (op.getOperatorTag() == LogicalOperatorTag.ASSIGN) {
+ AssignOperator assignOp = (AssignOperator) op;
+ computeLeafVariablesCount(assignOp);
+ List<LogicalVariable> vars = assignOp.getVariables();
+ for (LogicalVariable variable : vars) {
+ // Don't inline variables that potentially reference a large number of the same leaf variable.
+ Map<LogicalVariable, Integer> varMap = totalLeafVariableCounter.get(variable);
+ if (varMap != null && !varMap.isEmpty() && Collections.max(varMap.values()) > context
+ .getPhysicalOptimizationConfig().getMaxVariableOccurrencesForInlining()) {
+ varAssignRhs.remove(variable);
+ }
+ }
+ }
+
// Descend into subplan
if (op.getOperatorTag() == LogicalOperatorTag.SUBPLAN || op.getOperatorTag() == LogicalOperatorTag.WINDOW) {
List<ILogicalPlan> nestedPlans = ((AbstractOperatorWithNestedPlans) op).getNestedPlans();
@@ -259,6 +276,28 @@
}
}
+ private void computeLeafVariablesCount(AssignOperator assignOp) {
+ List<LogicalVariable> vars = assignOp.getVariables();
+ List<Mutable<ILogicalExpression>> exprs = assignOp.getExpressions();
+ for (int i = 0; i < vars.size(); i++) {
+ LogicalVariable variable = vars.get(i);
+ ILogicalExpression expr = exprs.get(i).getValue();
+ usedVars.clear();
+ expr.getUsedVariables(usedVars);
+ Map<LogicalVariable, Integer> varMap =
+ totalLeafVariableCounter.computeIfAbsent(variable, k -> new HashMap<>());
+ for (LogicalVariable usedVar : usedVars) {
+ if (totalLeafVariableCounter.containsKey(usedVar)) {
+ for (Map.Entry<LogicalVariable, Integer> entry : totalLeafVariableCounter.get(usedVar).entrySet()) {
+ varMap.put(entry.getKey(), entry.getValue() + varMap.getOrDefault(entry.getKey(), 0));
+ }
+ } else {
+ varMap.put(usedVar, 1);
+ }
+ }
+ }
+ }
+
public static class InlineVariablesVisitor extends LogicalExpressionReferenceTransformVisitor
implements ILogicalExpressionReferenceTransform {