[NO ISSUE][COMP] Fix isomorphism comparison of data-scan operators
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- When comparing data-scan operators IsomorphismOperatorVisitor
must consider their pushed conditions and limits
Change-Id: Id07b42579c7905b751de0f0ac7bdee5720cd4e7d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8063
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
index 917715e..319db54 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
@@ -42,6 +42,28 @@
tweetid : string
};
+create type onekType1 as
+ closed {
+ unique1 : bigint,
+ unique2 : bigint,
+ two : bigint,
+ four : bigint,
+ ten : bigint,
+ twenty : bigint,
+ onePercent : bigint,
+ tenPercent : bigint,
+ twentyPercent : bigint,
+ fiftyPercent : bigint,
+ unique3 : bigint,
+ evenOnePercent : bigint,
+ oddOnePercent : bigint,
+ stringu1 : string,
+ stringu2 : string,
+ string4 : string
+};
+
create dataset DBLP1(DBLPType) primary key id;
-create dataset TweetMessages(TweetMessageType) primary key tweetid;
\ No newline at end of file
+create dataset TweetMessages(TweetMessageType) primary key tweetid;
+
+create dataset onek1(onekType1) primary key unique2;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp
new file mode 100644
index 0000000..a91cece
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp
@@ -0,0 +1,46 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed condition.
+ *
+ * The dataset contains 100 tuples, 50 with two=0 and 50 with two=1.
+ * The expected result is 50.
+ */
+
+use test;
+
+with T1 as (
+ select two from onek1
+ where two between 1 and 10
+ limit 1000
+),
+T2 as (
+ select two from onek1
+ where two between -10 and -1
+ limit 1000
+)
+
+select value count(*) from (
+ select two from T1
+ union all
+ select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp
new file mode 100644
index 0000000..34eb190
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp
@@ -0,0 +1,45 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed condition
+ */
+
+use test;
+
+explain
+
+with T1 as (
+ select two from onek1
+ where two between 1 and 10
+ limit 1000
+),
+T2 as (
+ select two from onek1
+ where two between -10 and -1
+ limit 1000
+)
+
+select value count(*) from (
+ select two from T1
+ union all
+ select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
index a18f3e2..cd3d45f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
@@ -16,11 +16,11 @@
* specific language governing permissions and limitations
* under the License.
*/
-/* scan and print a delimited text file */
use test;
-
load dataset DBLP1 using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`));
-load dataset TweetMessages using localfs ((`path`=`asterix_nc1://data/tinysocial/twm.adm`),(`format`=`adm`));
\ No newline at end of file
+load dataset TweetMessages using localfs ((`path`=`asterix_nc1://data/tinysocial/twm.adm`),(`format`=`adm`));
+
+load dataset onek1 using localfs ((`path`=`asterix_nc1://data/wisc/onektup.adm`),(`format`=`delimited-text`),(`delimiter`=`|`)) pre-sorted;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
index 162cc35..36af900 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
@@ -38,5 +38,26 @@
misc : string
};
+create type onekType1 as
+ closed {
+ unique1 : bigint,
+ unique2 : bigint,
+ two : bigint,
+ four : bigint,
+ ten : bigint,
+ twenty : bigint,
+ onePercent : bigint,
+ tenPercent : bigint,
+ twentyPercent : bigint,
+ fiftyPercent : bigint,
+ unique3 : bigint,
+ evenOnePercent : bigint,
+ oddOnePercent : bigint,
+ stringu1 : string,
+ stringu2 : string,
+ string4 : string
+};
+
create dataset DBLP1(DBLPType) primary key id;
+create dataset onek1(onekType1) primary key unique2;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
index 2e0f056..4d4065d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
@@ -20,5 +20,6 @@
use test;
-
load dataset DBLP1 using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`));
+
+load dataset onek1 using localfs ((`path`=`asterix_nc1://data/wisc/onektup.adm`),(`format`=`delimited-text`),(`delimiter`=`|`)) pre-sorted;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp
new file mode 100644
index 0000000..dd9eb15
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp
@@ -0,0 +1,45 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed limit.
+ *
+ * The dataset contains 100 tuples,
+ * so the expected result is 110.
+ */
+
+use test;
+
+with
+T1 as (
+ select two from onek1
+ limit 100
+),
+T2 as (
+ select two from onek1
+ limit 10
+)
+
+select value count(*) from (
+ select two from T1
+ union all
+ select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp
new file mode 100644
index 0000000..a592faa
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp
@@ -0,0 +1,44 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed limit.
+ */
+
+use test;
+
+explain
+
+with
+T1 as (
+ select two from onek1
+ limit 100
+),
+T2 as (
+ select two from onek1
+ limit 10
+)
+
+select value count(*) from (
+ select two from T1
+ union all
+ select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm
new file mode 100644
index 0000000..c5b431b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm
@@ -0,0 +1 @@
+50
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm
new file mode 100644
index 0000000..a040253
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm
@@ -0,0 +1,52 @@
+distribute result [$$128]
+-- DISTRIBUTE_RESULT |LOCAL|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ aggregate [$$128] <- [agg-sql-sum($$129)]
+ -- AGGREGATE |LOCAL|
+ aggregate [$$129] <- [agg-sql-count(1)]
+ -- AGGREGATE |LOCAL|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ union
+ -- UNION_ALL |UNPARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ limit 1000
+ -- STREAM_LIMIT |UNPARTITIONED|
+ project ([])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$126(ASC) ] |PARTITIONED|
+ limit 1000
+ -- STREAM_LIMIT |PARTITIONED|
+ project ([$$126])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$126, $$onek1] <- test.onek1 condition (and(ge($$onek1.getField(2), 1), le($$onek1.getField(2), 10))) limit 1000
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ limit 1000
+ -- STREAM_LIMIT |UNPARTITIONED|
+ project ([])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$127(ASC) ] |PARTITIONED|
+ limit 1000
+ -- STREAM_LIMIT |PARTITIONED|
+ project ([$$127])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$127, $$onek1] <- test.onek1 condition (and(ge($$onek1.getField(2), -10), le($$onek1.getField(2), -1))) limit 1000
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm
new file mode 100644
index 0000000..97e3504
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm
@@ -0,0 +1 @@
+110
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm
new file mode 100644
index 0000000..f6de6e3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm
@@ -0,0 +1,52 @@
+distribute result [$$114]
+-- DISTRIBUTE_RESULT |LOCAL|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |LOCAL|
+ aggregate [$$114] <- [agg-sql-sum($$115)]
+ -- AGGREGATE |LOCAL|
+ aggregate [$$115] <- [agg-sql-count(1)]
+ -- AGGREGATE |LOCAL|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ union
+ -- UNION_ALL |UNPARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ limit 100
+ -- STREAM_LIMIT |UNPARTITIONED|
+ project ([])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$112(ASC) ] |PARTITIONED|
+ limit 100
+ -- STREAM_LIMIT |PARTITIONED|
+ project ([$$112])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$112, $$onek1] <- test.onek1 limit 100
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ limit 10
+ -- STREAM_LIMIT |UNPARTITIONED|
+ project ([])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- SORT_MERGE_EXCHANGE [$$113(ASC) ] |PARTITIONED|
+ limit 10
+ -- STREAM_LIMIT |PARTITIONED|
+ project ([$$113])
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$113, $$onek1] <- test.onek1 limit 10
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
index 7b3f53a..e50ea65 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
@@ -460,12 +460,17 @@
return Boolean.FALSE;
}
DataSourceScanOperator argScan = (DataSourceScanOperator) arg;
- if (!argScan.getDataSource().toString().equals(op.getDataSource().toString())) {
+ boolean isomorphic = op.getDataSource().getId().equals(argScan.getDataSource().getId())
+ && op.getOutputLimit() == argScan.getOutputLimit();
+ if (!isomorphic) {
return Boolean.FALSE;
}
DataSourceScanOperator scanOpArg = (DataSourceScanOperator) copyAndSubstituteVar(op, arg);
- boolean isomorphic = VariableUtilities.varListEqualUnordered(op.getVariables(), scanOpArg.getVariables())
- && op.getDataSource().toString().equals(scanOpArg.getDataSource().toString());
+ ILogicalExpression opCondition = op.getSelectCondition() != null ? op.getSelectCondition().getValue() : null;
+ ILogicalExpression argCondition =
+ scanOpArg.getSelectCondition() != null ? scanOpArg.getSelectCondition().getValue() : null;
+ isomorphic = VariableUtilities.varListEqualUnordered(op.getVariables(), scanOpArg.getVariables())
+ && Objects.equals(opCondition, argCondition);
return isomorphic;
}