[NO ISSUE][IDX] Disable index only plan when index includes UNKNOWNs

- user model changes: no
- storage format changes: no
- interface changes: no

Details:

Change-Id: I330aaaa1792ad7fe1b264f6e10577c3defe8eeec
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/14405
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java
index 3e5d5ae..e532187 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AccessMethodUtils.java
@@ -47,6 +47,7 @@
 import org.apache.asterix.metadata.entities.ExternalDatasetDetails;
 import org.apache.asterix.metadata.entities.Index;
 import org.apache.asterix.metadata.utils.ArrayIndexUtil;
+import org.apache.asterix.metadata.utils.IndexUtil;
 import org.apache.asterix.metadata.utils.KeyFieldTypeUtil;
 import org.apache.asterix.om.base.ABoolean;
 import org.apache.asterix.om.base.AInt32;
@@ -2103,7 +2104,8 @@
         // an inverted index contains a part of a field value, not all of it.
         if (noIndexOnlyPlanOption || dataset.getDatasetType() == DatasetType.EXTERNAL || chosenIndex.isPrimaryIndex()
                 || chosenIndex.getIndexDetails().isOverridingKeyFieldTypes() || chosenIndex.isEnforced()
-                || isInvertedIndex(chosenIndex) || chosenIndex.getIndexType() == IndexType.ARRAY) {
+                || isInvertedIndex(chosenIndex) || chosenIndex.getIndexType() == IndexType.ARRAY
+                || IndexUtil.includesUnknowns(chosenIndex)) {
             indexOnlyPlanInfo.setFirst(false);
             return;
         }
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01_ps.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01_ps.sqlpp
index 16cb966..7eb8623 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01_ps.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_01_ps.sqlpp
@@ -55,9 +55,9 @@
 
 create  index twmSndLocIx  on TweetMessages (`sender-location`) type rtree;
 
-create  index msgCountAIx  on TweetMessages (countA) type btree;
+create  index msgCountAIx  on TweetMessages (countA) type btree exclude unknown key;
 
-create  index msgCountBIx  on TweetMessages (countB) type btree;
+create  index msgCountBIx  on TweetMessages (countB) type btree exclude unknown key;
 
 create  index msgTextIx  on TweetMessages (`message-text`) type keyword;
 
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp
index 9da4825..4c937e3 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-join/leftouterjoin-probe-pidx-with-join-btree-sidx_03-index-only.sqlpp
@@ -55,9 +55,9 @@
 
 create  index twmSndLocIx  on TweetMessages (`sender-location`) type rtree;
 
-create  index msgCountAIx  on TweetMessages (countA) type btree;
+create  index msgCountAIx  on TweetMessages (countA) type btree exclude unknown key;
 
-create  index msgCountBIx  on TweetMessages (countB) type btree;
+create  index msgCountBIx  on TweetMessages (countB) type btree exclude unknown key;
 
 create  index msgTextIx  on TweetMessages (`message-text`) type keyword;
 
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-selection/btree-index-composite-key-04.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-selection/btree-index-composite-key-04.sqlpp
index e9afad8..917d2fd 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-selection/btree-index-composite-key-04.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index-selection/btree-index-composite-key-04.sqlpp
@@ -38,7 +38,7 @@
 
 create dataset employee(Emp) primary key id;
 
-create index idx_employee_f_l_name on employee (fname,lname) type btree;
+create index idx_employee_f_l_name on employee (fname,lname) type btree exclude unknown key;
 
 select id, fname, lname
 from employee
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01.sqlpp
index af1a099..d181857 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01.sqlpp
@@ -51,8 +51,8 @@
 
 create dataset MyData(MyRecord) primary key id;
 
-create index btree_index_docid on MyData(docid) type btree;
-create index btree_index_val1 on MyData(val1) type btree;
+create index btree_index_docid on MyData(docid) type btree exclude unknown key;
+create index btree_index_val1 on MyData(val1) type btree exclude unknown key;
 create index rtree_index_point on MyData(point) type rtree;
 create index rtree_index_rec on MyData(rec) type rtree;
 create index ngram_index_title on MyData(title) type ngram(3);
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01_ps.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01_ps.sqlpp
index e5c4e0c..a34ce04 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01_ps.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-01_ps.sqlpp
@@ -51,8 +51,8 @@
 
 create dataset MyData(MyRecord) primary key id;
 
-create index btree_index_docid on MyData(docid) type btree;
-create index btree_index_val1 on MyData(val1) type btree;
+create index btree_index_docid on MyData(docid) type btree exclude unknown key;
+create index btree_index_val1 on MyData(val1) type btree exclude unknown key;
 create index rtree_index_point on MyData(point) type rtree;
 create index rtree_index_rec on MyData(rec) type rtree;
 create index ngram_index_title on MyData(title) type ngram(3);
diff --git a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-10.sqlpp b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-10.sqlpp
index 3259a28..c7ee31c 100644
--- a/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-10.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/optimizerts/queries/btree-index/btree-sidx-idxonly-10.sqlpp
@@ -87,7 +87,7 @@
 
 create dataset ds_tweet(typeTweet) if not exists primary key id;
 
-create index create_at_status_count_idx on ds_tweet(user.create_at, user.status_count);
+create index create_at_status_count_idx on ds_tweet(user.create_at, user.status_count) exclude unknown key;
 
 select value count(first.create_at) from (
 select t.user.create_at, t.user.status_count, t.id from ds_tweet t
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/secondary-index-index-only/secondary-index-index-only.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/secondary-index-index-only/secondary-index-index-only.1.ddl.sqlpp
index c4d77ca..219f66b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/secondary-index-index-only/secondary-index-index-only.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/metrics/secondary-index-index-only/secondary-index-index-only.1.ddl.sqlpp
@@ -48,4 +48,4 @@
 };
 
 create dataset Customers(CustomerType) primary key cid;
-create index customer_name_idx on Customers(name);
\ No newline at end of file
+create index customer_name_idx on Customers(name) exclude unknown key;
\ No newline at end of file
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/IndexUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/IndexUtil.java
index ab5b362..ad92803 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/IndexUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/IndexUtil.java
@@ -45,6 +45,7 @@
 import org.apache.hyracks.api.exceptions.SourceLocation;
 import org.apache.hyracks.api.job.IJobletEventListenerFactory;
 import org.apache.hyracks.api.job.JobSpecification;
+import org.apache.hyracks.util.OptionalBoolean;
 
 public class IndexUtil {
 
@@ -216,4 +217,27 @@
             return null;
         }
     }
+
+    public static boolean includesUnknowns(Index index) {
+        return !index.isPrimaryKeyIndex() && secondaryIndexIncludesUnknowns(index);
+    }
+
+    private static boolean secondaryIndexIncludesUnknowns(Index index) {
+        if (Index.IndexCategory.of(index.getIndexType()) != Index.IndexCategory.VALUE) {
+            // other types of indexes do not include unknowns
+            return false;
+        }
+        OptionalBoolean excludeUnknownKey = ((Index.ValueIndexDetails) index.getIndexDetails()).getExcludeUnknownKey();
+        if (index.getIndexType() == DatasetConfig.IndexType.BTREE) {
+            // by default, Btree includes unknowns
+            return excludeUnknownKey.isEmpty() || !excludeUnknownKey.get();
+        } else {
+            // by default, others exclude unknowns
+            return !excludeUnknownKey.isEmpty() && !excludeUnknownKey.get();
+        }
+    }
+
+    public static boolean excludesUnknowns(Index index) {
+        return !includesUnknowns(index);
+    }
 }
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SecondaryIndexOperationsHelper.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SecondaryIndexOperationsHelper.java
index 8be248b..9e0d650 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SecondaryIndexOperationsHelper.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SecondaryIndexOperationsHelper.java
@@ -436,11 +436,7 @@
 
     protected static boolean excludeUnknownKeys(Index index, Index.ValueIndexDetails details,
             boolean anySecKeyIsNullable) {
-        return excludeUnknowns(index, details) && (anySecKeyIsNullable || details.isOverridingKeyFieldTypes());
-    }
-
-    private static boolean excludeUnknowns(Index index, Index.ValueIndexDetails details) {
-        return index.isPrimaryKeyIndex() || details.getExcludeUnknownKey().getOrElse(false);
+        return IndexUtil.excludesUnknowns(index) && (anySecKeyIsNullable || details.isOverridingKeyFieldTypes());
     }
 
     protected LSMIndexBulkLoadOperatorDescriptor createTreeIndexBulkLoadOp(JobSpecification spec,