[ASTERIXDB-2791][IDX] Add option to exclude unknowns in secondary indexes

- user model changes: yes
- storage format changes: no
- interface changes: no

Details:
- add EXCLUDE UNKNOWN KEY and INCLUDE UNKNOWN KEY to CREATE INDEX statement
  to allow user to exclude/include unknonwn keys.
- bring back the filter when creating the job spec for BTrees.
  Use the filter when unknown keys should be excluded.
- For UPSERT, pass the filter for previous tuple, as well, to determine
  if the previous tuple's index entry should be processed.

Change-Id: Id40de17d1392510f6ebfea5bd81037a9305895af
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/12624
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/SqlppStatementUtil.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/SqlppStatementUtil.java
index 5281dd9..7e1783b 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/SqlppStatementUtil.java
+++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/util/SqlppStatementUtil.java
@@ -32,6 +32,8 @@
     public static final String CREATE_INDEX = "CREATE INDEX ";
     public static final String CREATE_PRIMARY_INDEX = "CREATE PRIMARY INDEX ";
     public static final String DROP_INDEX = "DROP INDEX ";
+    public static final String INCLUDE_UNKNOWN_KEY = " INCLUDE UNKNOWN KEY ";
+    public static final String EXCLUDE_UNKNOWN_KEY = " EXCLUDE UNKNOWN KEY ";
     public static final String ON = " ON ";
     public static final String WHERE = " WHERE ";
     public static final String AND = " AND ";
@@ -69,10 +71,18 @@
 
     @SuppressWarnings("squid:S1172") // unused variable
     public static StringBuilder getCreateIndexStatement(StringBuilder stringBuilder, DataverseName dataverseName,
-            String datasetName, String indexName, String fields, int version) {
+            String datasetName, String indexName, String fields, Boolean excludeUnknown, int version) {
         stringBuilder.append(CREATE_INDEX);
         enclose(stringBuilder, indexName).append(ON);
-        return enclose(stringBuilder, dataverseName, datasetName).append(fields).append(SEMI_COLON);
+        StringBuilder appender = enclose(stringBuilder, dataverseName, datasetName).append(fields);
+        if (excludeUnknown != null) {
+            if (excludeUnknown) {
+                appender.append(EXCLUDE_UNKNOWN_KEY);
+            } else {
+                appender.append(INCLUDE_UNKNOWN_KEY);
+            }
+        }
+        return appender.append(SEMI_COLON);
     }
 
     @SuppressWarnings("squid:S1172") // unused variable
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
index de9ea7a..b240779 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
+++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
@@ -227,6 +227,7 @@
     private static final String CURRENT = "CURRENT";
     private static final String DEFAULT = "DEFAULT";
     private static final String EXCLUDE = "EXCLUDE";
+    private static final String INCLUDE = "INCLUDE";
     private static final String FIRST = "FIRST";
     private static final String FOLLOWING = "FOLLOWING";
     private static final String GROUPING = "GROUPING";
@@ -1131,6 +1132,7 @@
   boolean hasUnnest = false;
   String fullTextConfigName = null;
   Token startElementToken = null;
+  Boolean excludeUnknown = null;
 }
 {
   (
@@ -1149,6 +1151,17 @@
       )*
     <RIGHTPAREN>
     ( <TYPE> indexParams = IndexType() )? ( <ENFORCED> { enforced = true; } )?
+    ( <IDENTIFIER>
+      {
+        if (isToken(EXCLUDE)) {
+          excludeUnknown = true;
+        } else if (isToken(INCLUDE)) {
+          excludeUnknown = false;
+        } else {
+          throw createUnexpectedTokenError();
+        }
+      } <UNKNOWN> <KEY>
+    )?
   )
   {
     IndexType indexType;
@@ -1163,7 +1176,8 @@
       fullTextConfigName = null;
     }
     CreateIndexStatement stmt = new CreateIndexStatement(nameComponents.first, nameComponents.second,
-      new Identifier(indexName), indexType, indexedElementList, enforced, gramLength, fullTextConfigName, ifNotExists);
+      new Identifier(indexName), indexType, indexedElementList, enforced, gramLength, fullTextConfigName, ifNotExists,
+      excludeUnknown);
     return addSourceLocation(stmt, startStmtToken);
   }
 }
@@ -1292,7 +1306,7 @@
       indexName = "primary_idx_" + nameComponents.second;
     }
     CreateIndexStatement stmt = new CreateIndexStatement(nameComponents.first, nameComponents.second,
-      new Identifier(indexName), IndexType.BTREE, Collections.emptyList(), false, -1, null, ifNotExists);
+      new Identifier(indexName), IndexType.BTREE, Collections.emptyList(), false, -1, null, ifNotExists, null);
     return addSourceLocation(stmt, startStmtToken);
   }
 }