[NO ISSUE][COMP] Fix isomorphism comparison of data-scan operators

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- When comparing data-scan operators IsomorphismOperatorVisitor
  must consider their pushed conditions and limits

Change-Id: Id07b42579c7905b751de0f0ac7bdee5720cd4e7d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8063
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
index 917715e..319db54 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.1.ddl.sqlpp
@@ -42,6 +42,28 @@
   tweetid : string
 };
 
+create type onekType1 as
+ closed {
+  unique1 : bigint,
+  unique2 : bigint,
+  two : bigint,
+  four : bigint,
+  ten : bigint,
+  twenty : bigint,
+  onePercent : bigint,
+  tenPercent : bigint,
+  twentyPercent : bigint,
+  fiftyPercent : bigint,
+  unique3 : bigint,
+  evenOnePercent : bigint,
+  oddOnePercent : bigint,
+  stringu1 : string,
+  stringu2 : string,
+  string4 : string
+};
+
 create dataset DBLP1(DBLPType) primary key id;
 
-create  dataset TweetMessages(TweetMessageType) primary key tweetid;
\ No newline at end of file
+create  dataset TweetMessages(TweetMessageType) primary key tweetid;
+
+create dataset onek1(onekType1) primary key unique2;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp
new file mode 100644
index 0000000..a91cece
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.query.sqlpp
@@ -0,0 +1,46 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed condition.
+ *
+ * The dataset contains 100 tuples, 50 with two=0 and 50 with two=1.
+ * The expected result is 50.
+ */
+
+use test;
+
+with T1 as (
+  select two from onek1
+  where two between 1 and 10
+  limit 1000
+),
+T2 as (
+  select two from onek1
+  where two between -10 and -1
+  limit 1000
+)
+
+select value count(*) from (
+  select two from T1
+  union all
+  select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp
new file mode 100644
index 0000000..34eb190
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.query.sqlpp
@@ -0,0 +1,45 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed condition
+ */
+
+use test;
+
+explain
+
+with T1 as (
+  select two from onek1
+  where two between 1 and 10
+  limit 1000
+),
+T2 as (
+  select two from onek1
+  where two between -10 and -1
+  limit 1000
+)
+
+select value count(*) from (
+  select two from T1
+  union all
+  select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
index a18f3e2..cd3d45f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.2.update.sqlpp
@@ -16,11 +16,11 @@
  * specific language governing permissions and limitations
  * under the License.
  */
-/* scan and print a delimited text file */
 
 use test;
 
-
 load  dataset DBLP1 using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`));
 
-load  dataset TweetMessages using localfs ((`path`=`asterix_nc1://data/tinysocial/twm.adm`),(`format`=`adm`));
\ No newline at end of file
+load  dataset TweetMessages using localfs ((`path`=`asterix_nc1://data/tinysocial/twm.adm`),(`format`=`adm`));
+
+load dataset onek1 using localfs ((`path`=`asterix_nc1://data/wisc/onektup.adm`),(`format`=`delimited-text`),(`delimiter`=`|`)) pre-sorted;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
index 162cc35..36af900 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.1.ddl.sqlpp
@@ -38,5 +38,26 @@
   misc : string
 };
 
+create type onekType1 as
+ closed {
+  unique1 : bigint,
+  unique2 : bigint,
+  two : bigint,
+  four : bigint,
+  ten : bigint,
+  twenty : bigint,
+  onePercent : bigint,
+  tenPercent : bigint,
+  twentyPercent : bigint,
+  fiftyPercent : bigint,
+  unique3 : bigint,
+  evenOnePercent : bigint,
+  oddOnePercent : bigint,
+  stringu1 : string,
+  stringu2 : string,
+  string4 : string
+};
+
 create  dataset DBLP1(DBLPType) primary key id;
 
+create dataset onek1(onekType1) primary key unique2;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
index 2e0f056..4d4065d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.2.update.sqlpp
@@ -20,5 +20,6 @@
 
 use test;
 
-
 load  dataset DBLP1 using localfs ((`path`=`asterix_nc1://data/dblp-small/dblp-small-id.txt`),(`format`=`delimited-text`),(`delimiter`=`:`));
+
+load dataset onek1 using localfs ((`path`=`asterix_nc1://data/wisc/onektup.adm`),(`format`=`delimited-text`),(`delimiter`=`|`)) pre-sorted;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp
new file mode 100644
index 0000000..dd9eb15
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.query.sqlpp
@@ -0,0 +1,45 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed limit.
+ *
+ * The dataset contains 100 tuples,
+ * so the expected result is 110.
+ */
+
+use test;
+
+with
+T1 as (
+  select two from onek1
+  limit 100
+),
+T2 as (
+  select two from onek1
+  limit 10
+)
+
+select value count(*) from (
+  select two from T1
+  union all
+  select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp
new file mode 100644
index 0000000..a592faa
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.query.sqlpp
@@ -0,0 +1,44 @@
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/**
+ * Check that data-scan operator isomorphism comparison
+ * considers pushed limit.
+ */
+
+use test;
+
+explain
+
+with
+T1 as (
+  select two from onek1
+  limit 100
+),
+T2 as (
+  select two from onek1
+  limit 10
+)
+
+select value count(*) from (
+  select two from T1
+  union all
+  select two from T2
+) t;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm
new file mode 100644
index 0000000..c5b431b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.10.adm
@@ -0,0 +1 @@
+50
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm
new file mode 100644
index 0000000..a040253
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan-select/push-limit-to-primary-scan-select.11.adm
@@ -0,0 +1,52 @@
+distribute result [$$128]
+-- DISTRIBUTE_RESULT  |LOCAL|
+  exchange
+  -- ONE_TO_ONE_EXCHANGE  |LOCAL|
+    aggregate [$$128] <- [agg-sql-sum($$129)]
+    -- AGGREGATE  |LOCAL|
+      aggregate [$$129] <- [agg-sql-count(1)]
+      -- AGGREGATE  |LOCAL|
+        exchange
+        -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+          union
+          -- UNION_ALL  |UNPARTITIONED|
+            exchange
+            -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+              limit 1000
+              -- STREAM_LIMIT  |UNPARTITIONED|
+                project ([])
+                -- STREAM_PROJECT  |PARTITIONED|
+                  exchange
+                  -- SORT_MERGE_EXCHANGE [$$126(ASC) ]  |PARTITIONED|
+                    limit 1000
+                    -- STREAM_LIMIT  |PARTITIONED|
+                      project ([$$126])
+                      -- STREAM_PROJECT  |PARTITIONED|
+                        exchange
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          data-scan []<-[$$126, $$onek1] <- test.onek1 condition (and(ge($$onek1.getField(2), 1), le($$onek1.getField(2), 10))) limit 1000
+                          -- DATASOURCE_SCAN  |PARTITIONED|
+                            exchange
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              empty-tuple-source
+                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+            exchange
+            -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+              limit 1000
+              -- STREAM_LIMIT  |UNPARTITIONED|
+                project ([])
+                -- STREAM_PROJECT  |PARTITIONED|
+                  exchange
+                  -- SORT_MERGE_EXCHANGE [$$127(ASC) ]  |PARTITIONED|
+                    limit 1000
+                    -- STREAM_LIMIT  |PARTITIONED|
+                      project ([$$127])
+                      -- STREAM_PROJECT  |PARTITIONED|
+                        exchange
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          data-scan []<-[$$127, $$onek1] <- test.onek1 condition (and(ge($$onek1.getField(2), -10), le($$onek1.getField(2), -1))) limit 1000
+                          -- DATASOURCE_SCAN  |PARTITIONED|
+                            exchange
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              empty-tuple-source
+                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm
new file mode 100644
index 0000000..97e3504
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.6.adm
@@ -0,0 +1 @@
+110
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm
new file mode 100644
index 0000000..f6de6e3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/limit/push-limit-to-primary-scan/push-limit-to-primary-scan.7.adm
@@ -0,0 +1,52 @@
+distribute result [$$114]
+-- DISTRIBUTE_RESULT  |LOCAL|
+  exchange
+  -- ONE_TO_ONE_EXCHANGE  |LOCAL|
+    aggregate [$$114] <- [agg-sql-sum($$115)]
+    -- AGGREGATE  |LOCAL|
+      aggregate [$$115] <- [agg-sql-count(1)]
+      -- AGGREGATE  |LOCAL|
+        exchange
+        -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+          union
+          -- UNION_ALL  |UNPARTITIONED|
+            exchange
+            -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+              limit 100
+              -- STREAM_LIMIT  |UNPARTITIONED|
+                project ([])
+                -- STREAM_PROJECT  |PARTITIONED|
+                  exchange
+                  -- SORT_MERGE_EXCHANGE [$$112(ASC) ]  |PARTITIONED|
+                    limit 100
+                    -- STREAM_LIMIT  |PARTITIONED|
+                      project ([$$112])
+                      -- STREAM_PROJECT  |PARTITIONED|
+                        exchange
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          data-scan []<-[$$112, $$onek1] <- test.onek1 limit 100
+                          -- DATASOURCE_SCAN  |PARTITIONED|
+                            exchange
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              empty-tuple-source
+                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
+            exchange
+            -- ONE_TO_ONE_EXCHANGE  |UNPARTITIONED|
+              limit 10
+              -- STREAM_LIMIT  |UNPARTITIONED|
+                project ([])
+                -- STREAM_PROJECT  |PARTITIONED|
+                  exchange
+                  -- SORT_MERGE_EXCHANGE [$$113(ASC) ]  |PARTITIONED|
+                    limit 10
+                    -- STREAM_LIMIT  |PARTITIONED|
+                      project ([$$113])
+                      -- STREAM_PROJECT  |PARTITIONED|
+                        exchange
+                        -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                          data-scan []<-[$$113, $$onek1] <- test.onek1 limit 10
+                          -- DATASOURCE_SCAN  |PARTITIONED|
+                            exchange
+                            -- ONE_TO_ONE_EXCHANGE  |PARTITIONED|
+                              empty-tuple-source
+                              -- EMPTY_TUPLE_SOURCE  |PARTITIONED|
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
index 7b3f53a..e50ea65 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
@@ -460,12 +460,17 @@
             return Boolean.FALSE;
         }
         DataSourceScanOperator argScan = (DataSourceScanOperator) arg;
-        if (!argScan.getDataSource().toString().equals(op.getDataSource().toString())) {
+        boolean isomorphic = op.getDataSource().getId().equals(argScan.getDataSource().getId())
+                && op.getOutputLimit() == argScan.getOutputLimit();
+        if (!isomorphic) {
             return Boolean.FALSE;
         }
         DataSourceScanOperator scanOpArg = (DataSourceScanOperator) copyAndSubstituteVar(op, arg);
-        boolean isomorphic = VariableUtilities.varListEqualUnordered(op.getVariables(), scanOpArg.getVariables())
-                && op.getDataSource().toString().equals(scanOpArg.getDataSource().toString());
+        ILogicalExpression opCondition = op.getSelectCondition() != null ? op.getSelectCondition().getValue() : null;
+        ILogicalExpression argCondition =
+                scanOpArg.getSelectCondition() != null ? scanOpArg.getSelectCondition().getValue() : null;
+        isomorphic = VariableUtilities.varListEqualUnordered(op.getVariables(), scanOpArg.getVariables())
+                && Objects.equals(opCondition, argCondition);
         return isomorphic;
     }