ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field

Change-Id: If58f594c0a7b6f4bca45b13ceaef07b605d2fe22
Reviewed-on: https://asterix-gerrit.ics.uci.edu/740
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ildar Absalyamov <ildar.absalyamov@gmail.com>
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
new file mode 100644
index 0000000..f736ebc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse OpenTinySocial if exists;
+create dataverse OpenTinySocial;
+use dataverse OpenTinySocial;
+create type FacebookMessageType as
+{ message-id: int64 }
+
+create dataset FacebookMessages(FacebookMessageType)
+primary key message-id;
+create index fbAuthorIdx on FacebookMessages(author-id: int64) type btree enforced;
+create index fbSenderLocIndex on FacebookMessages(sender-location: point) type rtree enforced;
+create index fbMessageIdx on FacebookMessages(message: string) type keyword enforced;
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
new file mode 100644
index 0000000..f7740dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+load dataset FacebookMessages
+using localfs
+(("path"="asterix_nc1://data/tinysocial/fbm.adm"),("format"="adm")) pre-sorted;
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
new file mode 100644
index 0000000..d97df77
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+count(
+for $c in dataset('FacebookMessages')
+return $c
+);
diff --git a/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
new file mode 100644
index 0000000..60d3b2f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
@@ -0,0 +1 @@
+15
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 81480fd..115cf92 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -1764,6 +1764,11 @@
             </compilation-unit>
         </test-case>
         <test-case FilePath="dml">
+            <compilation-unit name="load-with-index-open_02">
+                <output-dir compare="Text">load-with-index-open_02</output-dir>
+            </compilation-unit>
+        </test-case>
+        <test-case FilePath="dml">
             <compilation-unit name="load-with-ngram-index-open">
                 <output-dir compare="Text">load-with-ngram-index-open</output-dir>
             </compilation-unit>
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
index 90bec64..5eba66c 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
@@ -589,7 +589,7 @@
 
     public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDatasetDataScannerRuntime(
             JobSpecification jobSpec, IAType itemType, IAdapterFactory adapterFactory, IDataFormat format)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         if (itemType.getTypeTag() != ATypeTag.RECORD) {
             throw new AlgebricksException("Can only scan datasets of records.");
         }
@@ -676,7 +676,8 @@
                 }
                 Pair<IBinaryComparatorFactory[], ITypeTraits[]> comparatorFactoriesAndTypeTraits = getComparatorFactoriesAndTypeTraitsOfSecondaryBTreeIndex(
                         secondaryIndex.getIndexType(), secondaryIndex.getKeyFieldNames(),
-                        secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType, dataset.getDatasetType());
+                        secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType,
+                        dataset.getDatasetType());
                 comparatorFactories = comparatorFactoriesAndTypeTraits.first;
                 typeTraits = comparatorFactoriesAndTypeTraits.second;
                 if (filterTypeTraits != null) {
@@ -799,21 +800,21 @@
         }
 
         for (int j = 0; j < pidxKeyFieldCount; ++j, ++i) {
-           IAType keyType = null;
-           try {
-               switch (dsType) {
-                   case INTERNAL:
-                   keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
-                   break;
-               case EXTERNAL:
-                   keyType = IndexingConstants.getFieldType(j);
-                   break;
-               default:
-                   throw new AlgebricksException("Unknown Dataset Type");
-               }
-           } catch (AsterixException e) {
-               throw new AlgebricksException(e);
-           }
+            IAType keyType = null;
+            try {
+                switch (dsType) {
+                    case INTERNAL:
+                        keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
+                        break;
+                    case EXTERNAL:
+                        keyType = IndexingConstants.getFieldType(j);
+                        break;
+                    default:
+                        throw new AlgebricksException("Unknown Dataset Type");
+                }
+            } catch (AsterixException e) {
+                throw new AlgebricksException(e);
+            }
             comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType,
                     true);
             typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType);
@@ -1363,7 +1364,7 @@
             IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
             AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context,
             JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload)
-                    throws AlgebricksException {
+            throws AlgebricksException {
 
         // Sanity checks.
         if (primaryKeys.size() > 1) {
@@ -1467,6 +1468,7 @@
                     dataset.getDatasetName(), indexName);
 
             List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
+            List<IAType> secondaryKeyTypeEntries = secondaryIndex.getKeyFieldTypes();
 
             int numTokenFields = (!isPartitioned) ? secondaryKeys.size() : secondaryKeys.size() + 1;
             ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields];
@@ -1476,7 +1478,8 @@
             // return the derived type.
             // e.g. UNORDERED LIST -> return UNORDERED LIST type
             IAType secondaryKeyType = null;
-            Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(secondaryKeyExprs.get(0), recType);
+            Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypeEntries.get(0),
+                    secondaryKeyExprs.get(0), recType);
             secondaryKeyType = keyPairType.first;
             List<List<String>> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
             i = 0;
@@ -1552,7 +1555,7 @@
             IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
             List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
             ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         return getIndexInsertOrDeleteRuntime(IndexOperation.DELETE, dataSourceIndex, propagatedSchema, inputSchemas,
                 typeEnv, primaryKeys, secondaryKeys, additionalNonKeyFields, filterExpr, recordDesc, context, spec,
                 false);
@@ -1560,7 +1563,7 @@
 
     private AsterixTupleFilterFactory createTupleFilterFactory(IOperatorSchema[] inputSchemas,
             IVariableTypeEnvironment typeEnv, ILogicalExpression filterExpr, JobGenContext context)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // No filtering condition.
         if (filterExpr == null) {
             return null;
@@ -2233,7 +2236,7 @@
             JobSpecification jobSpec, Dataset dataset, Index secondaryIndex, int[] ridIndexes, boolean retainInput,
             IVariableTypeEnvironment typeEnv, List<LogicalVariable> outputVars, IOperatorSchema opSchema,
             JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         try {
             // Get data type
             IAType itemType = null;
@@ -2485,7 +2488,7 @@
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexType indexType,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // Check the index is length-partitioned or not.
         boolean isPartitioned;
         if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX
@@ -2710,7 +2713,7 @@
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         try {
             Dataset dataset = MetadataManager.INSTANCE.getDataset(mdTxnCtx, dataverseName, datasetName);
 
@@ -2864,7 +2867,7 @@
             List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
             RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
             List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
-                    throws AlgebricksException {
+            throws AlgebricksException {
         // we start with the btree
         Dataset dataset = findDataset(dataverseName, datasetName);
         if (dataset == null) {