ASTERIXDB-1233: Fixed the bulk-loading with an inverted index on an open-type field
Change-Id: If58f594c0a7b6f4bca45b13ceaef07b605d2fe22
Reviewed-on: https://asterix-gerrit.ics.uci.edu/740
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ildar Absalyamov <ildar.absalyamov@gmail.com>
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
new file mode 100644
index 0000000..f736ebc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.1.ddl.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse OpenTinySocial if exists;
+create dataverse OpenTinySocial;
+use dataverse OpenTinySocial;
+create type FacebookMessageType as
+{ message-id: int64 }
+
+create dataset FacebookMessages(FacebookMessageType)
+primary key message-id;
+create index fbAuthorIdx on FacebookMessages(author-id: int64) type btree enforced;
+create index fbSenderLocIndex on FacebookMessages(sender-location: point) type rtree enforced;
+create index fbMessageIdx on FacebookMessages(message: string) type keyword enforced;
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
new file mode 100644
index 0000000..f7740dc
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.2.update.aql
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+load dataset FacebookMessages
+using localfs
+(("path"="asterix_nc1://data/tinysocial/fbm.adm"),("format"="adm")) pre-sorted;
diff --git a/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
new file mode 100644
index 0000000..d97df77
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/queries/dml/load-with-index-open_02/load-with-index-open_02.3.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse OpenTinySocial;
+
+count(
+for $c in dataset('FacebookMessages')
+return $c
+);
diff --git a/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
new file mode 100644
index 0000000..60d3b2f
--- /dev/null
+++ b/asterix-app/src/test/resources/runtimets/results/dml/load-with-index-open_02/load-with-index-open_02.1.adm
@@ -0,0 +1 @@
+15
diff --git a/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterix-app/src/test/resources/runtimets/testsuite.xml
index 81480fd..115cf92 100644
--- a/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -1764,6 +1764,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="dml">
+ <compilation-unit name="load-with-index-open_02">
+ <output-dir compare="Text">load-with-index-open_02</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="load-with-ngram-index-open">
<output-dir compare="Text">load-with-ngram-index-open</output-dir>
</compilation-unit>
diff --git a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
index 90bec64..5eba66c 100644
--- a/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/AqlMetadataProvider.java
@@ -589,7 +589,7 @@
public Pair<IOperatorDescriptor, AlgebricksPartitionConstraint> buildExternalDatasetDataScannerRuntime(
JobSpecification jobSpec, IAType itemType, IAdapterFactory adapterFactory, IDataFormat format)
- throws AlgebricksException {
+ throws AlgebricksException {
if (itemType.getTypeTag() != ATypeTag.RECORD) {
throw new AlgebricksException("Can only scan datasets of records.");
}
@@ -676,7 +676,8 @@
}
Pair<IBinaryComparatorFactory[], ITypeTraits[]> comparatorFactoriesAndTypeTraits = getComparatorFactoriesAndTypeTraitsOfSecondaryBTreeIndex(
secondaryIndex.getIndexType(), secondaryIndex.getKeyFieldNames(),
- secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType, dataset.getDatasetType());
+ secondaryIndex.getKeyFieldTypes(), DatasetUtils.getPartitioningKeys(dataset), itemType,
+ dataset.getDatasetType());
comparatorFactories = comparatorFactoriesAndTypeTraits.first;
typeTraits = comparatorFactoriesAndTypeTraits.second;
if (filterTypeTraits != null) {
@@ -799,21 +800,21 @@
}
for (int j = 0; j < pidxKeyFieldCount; ++j, ++i) {
- IAType keyType = null;
- try {
- switch (dsType) {
- case INTERNAL:
- keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
- break;
- case EXTERNAL:
- keyType = IndexingConstants.getFieldType(j);
- break;
- default:
- throw new AlgebricksException("Unknown Dataset Type");
- }
- } catch (AsterixException e) {
- throw new AlgebricksException(e);
- }
+ IAType keyType = null;
+ try {
+ switch (dsType) {
+ case INTERNAL:
+ keyType = recType.getSubFieldType(pidxKeyFieldNames.get(j));
+ break;
+ case EXTERNAL:
+ keyType = IndexingConstants.getFieldType(j);
+ break;
+ default:
+ throw new AlgebricksException("Unknown Dataset Type");
+ }
+ } catch (AsterixException e) {
+ throw new AlgebricksException(e);
+ }
comparatorFactories[i] = AqlBinaryComparatorFactoryProvider.INSTANCE.getBinaryComparatorFactory(keyType,
true);
typeTraits[i] = AqlTypeTraitProvider.INSTANCE.getTypeTrait(keyType);
@@ -1363,7 +1364,7 @@
IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys, List<LogicalVariable> secondaryKeys,
AsterixTupleFilterFactory filterFactory, RecordDescriptor recordDesc, JobGenContext context,
JobSpecification spec, IndexOperation indexOp, IndexType indexType, boolean bulkload)
- throws AlgebricksException {
+ throws AlgebricksException {
// Sanity checks.
if (primaryKeys.size() > 1) {
@@ -1467,6 +1468,7 @@
dataset.getDatasetName(), indexName);
List<List<String>> secondaryKeyExprs = secondaryIndex.getKeyFieldNames();
+ List<IAType> secondaryKeyTypeEntries = secondaryIndex.getKeyFieldTypes();
int numTokenFields = (!isPartitioned) ? secondaryKeys.size() : secondaryKeys.size() + 1;
ITypeTraits[] tokenTypeTraits = new ITypeTraits[numTokenFields];
@@ -1476,7 +1478,8 @@
// return the derived type.
// e.g. UNORDERED LIST -> return UNORDERED LIST type
IAType secondaryKeyType = null;
- Pair<IAType, Boolean> keyPairType = Index.getNonNullableKeyFieldType(secondaryKeyExprs.get(0), recType);
+ Pair<IAType, Boolean> keyPairType = Index.getNonNullableOpenFieldType(secondaryKeyTypeEntries.get(0),
+ secondaryKeyExprs.get(0), recType);
secondaryKeyType = keyPairType.first;
List<List<String>> partitioningKeys = DatasetUtils.getPartitioningKeys(dataset);
i = 0;
@@ -1552,7 +1555,7 @@
IOperatorSchema[] inputSchemas, IVariableTypeEnvironment typeEnv, List<LogicalVariable> primaryKeys,
List<LogicalVariable> secondaryKeys, List<LogicalVariable> additionalNonKeyFields,
ILogicalExpression filterExpr, RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec)
- throws AlgebricksException {
+ throws AlgebricksException {
return getIndexInsertOrDeleteRuntime(IndexOperation.DELETE, dataSourceIndex, propagatedSchema, inputSchemas,
typeEnv, primaryKeys, secondaryKeys, additionalNonKeyFields, filterExpr, recordDesc, context, spec,
false);
@@ -1560,7 +1563,7 @@
private AsterixTupleFilterFactory createTupleFilterFactory(IOperatorSchema[] inputSchemas,
IVariableTypeEnvironment typeEnv, ILogicalExpression filterExpr, JobGenContext context)
- throws AlgebricksException {
+ throws AlgebricksException {
// No filtering condition.
if (filterExpr == null) {
return null;
@@ -2233,7 +2236,7 @@
JobSpecification jobSpec, Dataset dataset, Index secondaryIndex, int[] ridIndexes, boolean retainInput,
IVariableTypeEnvironment typeEnv, List<LogicalVariable> outputVars, IOperatorSchema opSchema,
JobGenContext context, AqlMetadataProvider metadataProvider, boolean retainNull)
- throws AlgebricksException {
+ throws AlgebricksException {
try {
// Get data type
IAType itemType = null;
@@ -2485,7 +2488,7 @@
List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec, IndexType indexType,
List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
- throws AlgebricksException {
+ throws AlgebricksException {
// Check the index is length-partitioned or not.
boolean isPartitioned;
if (indexType == IndexType.LENGTH_PARTITIONED_WORD_INVIX
@@ -2710,7 +2713,7 @@
List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
- throws AlgebricksException {
+ throws AlgebricksException {
try {
Dataset dataset = MetadataManager.INSTANCE.getDataset(mdTxnCtx, dataverseName, datasetName);
@@ -2864,7 +2867,7 @@
List<LogicalVariable> additionalFilteringKeys, AsterixTupleFilterFactory filterFactory,
RecordDescriptor recordDesc, JobGenContext context, JobSpecification spec,
List<LogicalVariable> prevSecondaryKeys, List<LogicalVariable> prevAdditionalFilteringKeys)
- throws AlgebricksException {
+ throws AlgebricksException {
// we start with the btree
Dataset dataset = findDataset(dataverseName, datasetName);
if (dataset == null) {