[ASTERIXDB-3502][STO] Incorrectly treating two expressions as commutative
- user model changes: no
- storage format changes: no
- interface changes: no
- Incorrectly considering two different expression as commutative
Ext-ref: MB-63525
Change-Id: Ia83d1559f5f45406075a384ce25251e50b362b5b
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18875
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.01.ddl.sqlpp
new file mode 100644
index 0000000..e4c59d2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.01.ddl.sqlpp
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+DROP DATAVERSE tpch IF EXISTS;
+CREATE DATAVERSE tpch;
+
+USE tpch;
+
+CREATE TYPE tpch.CustomerType AS
+ CLOSED {
+ c_custkey : integer,
+ c_name : string,
+ c_address : string,
+ c_nationkey : integer,
+ c_phone : string,
+ c_acctbal : double,
+ c_mktsegment : string,
+ c_comment : string
+};
+
+CREATE TYPE tpch.SupplierType AS
+ CLOSED {
+ s_suppkey : integer,
+ s_name : string,
+ s_address : string,
+ s_nationkey : integer,
+ s_phone : string,
+ s_acctbal : double,
+ s_comment : string
+};
+
+CREATE TYPE tpch.NationType AS
+ CLOSED {
+ n_nationkey : integer,
+ n_name : string,
+ n_regionkey : integer,
+ n_comment : string
+};
+
+CREATE DATASET Supplier(SupplierType) PRIMARY KEY s_suppkey;
+
+CREATE DATASET Nation(NationType) PRIMARY KEY n_nationkey;
+
+CREATE DATASET Customer(CustomerType) PRIMARY KEY c_custkey;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.02.update.sqlpp
new file mode 100644
index 0000000..b4ae2b6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.02.update.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE tpch;
+
+LOAD DATASET Supplier USING localfs ((`path`=`asterix_nc1://data/tpch0.001/supplier.tbl`),(`format`=`delimited-text`),(`delimiter`=`|`));
+
+LOAD DATASET Nation USING localfs ((`path`=`asterix_nc1://data/tpch0.001/nation.tbl`),(`format`=`delimited-text`),(`delimiter`=`|`));
+
+LOAD DATASET Customer USING localfs ((`path`=`asterix_nc1://data/tpch0.001/customer.tbl`),(`format`=`delimited-text`),(`delimiter`=`|`));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.03.query.sqlpp
new file mode 100644
index 0000000..76750f9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/join/ASTERIXDB-3502/ASTERIXDB-3502.03.query.sqlpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Similar to hash-join-with-redundant-variable.04.query.sqlpp
+ * But with Index NL. The plan of this test should has three
+ * hash-partition-exchange (as opposed to test 13 & 14). Because the parallelism
+ * is set to 3, then the last join requires both sides to be hash partitioned.
+ * Customer will need to duplicate its variable to join both with Nation and Supplier.
+ * This is the effect of using Index NL with parallelism != # of partitions
+ */
+
+USE tpch;
+
+-- this query should not give any results
+
+SELECT n.n_nationkey, s.s_nationkey, c.c_nationkey
+FROM Nation n, Supplier s, Customer c
+WHERE s.s_nationkey = n.n_nationkey
+AND c.c_nationkey = n.n_nationkey
+AND (s.s_nationkey = c.c_nationkey) = (s.s_nationkey = c.c_nationkey)
+-- before ASTERIXDB-3502, below expression was getting removed, hence evaluated to wrong result
+AND (s.s_nationkey != c.c_nationkey) = (s.s_nationkey = c.c_nationkey)
+ORDER BY n.n_nationkey, s.s_nationkey, c.c_nationkey;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/join/ASTERIXDB-3502/ASTERIXDB-3502.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/join/ASTERIXDB-3502/ASTERIXDB-3502.03.adm
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/join/ASTERIXDB-3502/ASTERIXDB-3502.03.adm
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index c63de24..717700f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -6655,6 +6655,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="join">
+ <compilation-unit name="ASTERIXDB-3502">
+ <output-dir compare="Text">ASTERIXDB-3502</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="join">
<compilation-unit name="hash_join_array">
<output-dir compare="Text">hash_join_array</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
index cce0bd8..24b38ab 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/FunctionUtil.java
@@ -347,30 +347,17 @@
return expr1.equals(expr2);
}
- return commutativeEquals(expr1, expr2, new BitSet());
- }
-
- private static boolean commutativeEquals(ILogicalExpression expr1, ILogicalExpression expr2, BitSet matched) {
- if (expr1.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL
- || expr2.getExpressionTag() != LogicalExpressionTag.FUNCTION_CALL) {
- return expr1.equals(expr2);
- }
-
- AbstractFunctionCallExpression funcExpr1 = (AbstractFunctionCallExpression) expr1;
- AbstractFunctionCallExpression funcExpr2 = (AbstractFunctionCallExpression) expr2;
-
List<Mutable<ILogicalExpression>> args1 = funcExpr1.getArguments();
List<Mutable<ILogicalExpression>> args2 = funcExpr2.getArguments();
- BitSet childrenSet = new BitSet();
+ BitSet matched = new BitSet();
int numberOfMatches = 0;
for (Mutable<ILogicalExpression> arg1 : args1) {
int prevNumberOfMatches = numberOfMatches;
for (int i = 0; i < args2.size(); i++) {
Mutable<ILogicalExpression> arg2 = args2.get(i);
- childrenSet.clear();
- if (!matched.get(i) && commutativeEquals(arg1.getValue(), arg2.getValue(), childrenSet)) {
+ if (!matched.get(i) && commutativeEquals(arg1.getValue(), arg2.getValue())) {
matched.set(i);
numberOfMatches++;
break;
diff --git a/asterixdb/asterix-lang-sqlpp/src/test/java/org/apache/asterix/lang/expression/CommutativeEqualsTest.java b/asterixdb/asterix-lang-sqlpp/src/test/java/org/apache/asterix/lang/expression/CommutativeEqualsTest.java
index 06f5ba7..aba83db 100644
--- a/asterixdb/asterix-lang-sqlpp/src/test/java/org/apache/asterix/lang/expression/CommutativeEqualsTest.java
+++ b/asterixdb/asterix-lang-sqlpp/src/test/java/org/apache/asterix/lang/expression/CommutativeEqualsTest.java
@@ -24,11 +24,14 @@
import java.util.Map;
import org.apache.asterix.lang.common.util.FunctionUtil;
+import org.apache.asterix.om.base.AInt32;
+import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.BuiltinFunctions;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
@@ -46,7 +49,7 @@
private int varCounter;
@Test
- public void testTwoOperands() {
+ public void testExpressions() {
// EQ
reset();
ILogicalExpression expr1 = createExpression(BuiltinFunctions.EQ, 'x', 'y');
@@ -62,6 +65,69 @@
expr1 = createExpression(BuiltinFunctions.EQ, 'x', 'x');
expr2 = createExpression(BuiltinFunctions.EQ, 'x', 'y');
Assert.assertFalse(FunctionUtil.commutativeEquals(expr1, expr2));
+
+ // ( ( NOT ( bool_field1 ) ) ) AND ( ( bool_field1 ) )
+ // ( ( bool_field1 ) ) AND ( ( bool_field1 = true ) )
+ reset();
+ expr1 = createExpression(BuiltinFunctions.AND,
+ createExpression(BuiltinFunctions.NOT, getVariableExpression('x')), getVariableExpression('x'));
+
+ expr2 = createExpression(BuiltinFunctions.AND, getVariableExpression('x'),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('x'), ConstantExpression.TRUE));
+ Assert.assertFalse(FunctionUtil.commutativeEquals(expr1, expr2));
+
+ // ( UPPER(varchar_field1) = varchar_field1 AND int_field1 = 2 )
+ // ( LOWER(varchar_field1) = varchar_field1 AND int_field1 = 2 )
+ reset();
+ expr1 = createExpression(BuiltinFunctions.AND,
+ createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.STRING_UPPERCASE, getVariableExpression('x')),
+ ConstantExpression.TRUE),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('y'),
+ new ConstantExpression(new AsterixConstantValue(new AInt32(2)))));
+
+ expr2 = createExpression(BuiltinFunctions.AND,
+ createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.STRING_LOWERCASE, getVariableExpression('x')),
+ ConstantExpression.TRUE),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('y'),
+ new ConstantExpression(new AsterixConstantValue(new AInt32(2)))));
+
+ Assert.assertFalse(FunctionUtil.commutativeEquals(expr1, expr2));
+
+ expr2 = createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.STRING_LOWERCASE, getVariableExpression('x')),
+ ConstantExpression.TRUE),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('y'),
+ new ConstantExpression(new AsterixConstantValue(new AInt32(2)))));
+
+ // ( LOWER(varchar_field1) = varchar_field1 AND int_field1 = 2 )
+ // ( int_field1 = 2 AND LOWER(varchar_field1) = varchar_field1)
+ // should evaluate to true
+ ILogicalExpression expr3 = createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('y'),
+ new ConstantExpression(new AsterixConstantValue(new AInt32(2)))),
+ createExpression(BuiltinFunctions.EQ,
+ createExpression(BuiltinFunctions.STRING_LOWERCASE, getVariableExpression('x')),
+ ConstantExpression.TRUE));
+
+ Assert.assertTrue(FunctionUtil.commutativeEquals(expr2, expr3));
+
+ // ( 10/int_field1=6911432 ) AND ( bool_field1=true )
+ // ( int_field1/10=6911432 ) AND ( bool_field1 = true )
+ reset();
+ expr1 = createExpression(BuiltinFunctions.AND,
+ createExpression(BuiltinFunctions.NUMERIC_DIV,
+ new ConstantExpression(new AsterixConstantValue(new AInt32(10))), getVariableExpression('i')),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('b'), ConstantExpression.TRUE));
+
+ expr2 = createExpression(BuiltinFunctions.AND,
+ createExpression(BuiltinFunctions.NUMERIC_DIV, getVariableExpression('i'),
+ new ConstantExpression(new AsterixConstantValue(new AInt32(10)))),
+ createExpression(BuiltinFunctions.EQ, getVariableExpression('b'), ConstantExpression.TRUE));
+
+ Assert.assertFalse(FunctionUtil.commutativeEquals(expr1, expr2));
}
private void reset() {
@@ -69,19 +135,31 @@
varNameToVarMap.clear();
}
- private ILogicalExpression createExpression(FunctionIdentifier fid, char left, char right) {
+ private ILogicalExpression createExpression(FunctionIdentifier fid, ILogicalExpression... left) {
List<Mutable<ILogicalExpression>> args = new ArrayList<>();
- args.add(getVariableExpression(left));
- args.add(getVariableExpression(right));
+ for (ILogicalExpression expr : left) {
+ args.add(new MutableObject<>(expr));
+ }
IFunctionInfo funcInfo = BuiltinFunctions.getBuiltinFunctionInfo(fid);
return new ScalarFunctionCallExpression(funcInfo, args);
}
- private Mutable<ILogicalExpression> getVariableExpression(Character displayName) {
+ private ILogicalExpression createExpression(FunctionIdentifier fid, char... left) {
+ List<Mutable<ILogicalExpression>> args = new ArrayList<>();
+
+ for (int i = 0; i < left.length; i++) {
+ args.add(new MutableObject<>(getVariableExpression(left[i])));
+ }
+
+ IFunctionInfo funcInfo = BuiltinFunctions.getBuiltinFunctionInfo(fid);
+ return new ScalarFunctionCallExpression(funcInfo, args);
+ }
+
+ private ILogicalExpression getVariableExpression(Character displayName) {
LogicalVariable variable = varNameToVarMap.computeIfAbsent(displayName,
k -> new LogicalVariable(varCounter++, displayName.toString()));
- return new MutableObject<>(new VariableReferenceExpression(variable));
+ return new VariableReferenceExpression(variable);
}
}