Merge branch 'gerrit/ionic' into 'master'
Change-Id: I7295ae44db8341e6f38c4a73a04bc9412cfa3136
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
index 6536c86..755d4f6 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
@@ -63,6 +63,7 @@
import org.apache.asterix.lang.common.statement.IndexDropStatement;
import org.apache.asterix.lang.common.statement.InsertStatement;
import org.apache.asterix.lang.common.statement.LoadStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.TypeDecl;
import org.apache.asterix.lang.common.statement.TypeDropStatement;
import org.apache.asterix.lang.common.statement.UpsertStatement;
@@ -266,6 +267,14 @@
}
break;
+ case TRUNCATE:
+ namespace = getStatementNamespace(((TruncateDatasetStatement) stmt).getNamespace(), activeNamespace);
+ invalidOperation = isSystemNamespace(namespace);
+ if (invalidOperation) {
+ message = formatObjectDdlMessage("truncate", dataset(), namespace, usingDb);
+ }
+ break;
+
case DATASET_DROP:
namespace = getStatementNamespace(((DropDatasetStatement) stmt).getNamespace(), activeNamespace);
invalidOperation = isSystemNamespace(namespace);
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/CompiledStatements.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/CompiledStatements.java
index a39bc06..6813e84 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/CompiledStatements.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/CompiledStatements.java
@@ -607,6 +607,7 @@
private final boolean autogenerated;
private final ARecordType itemType;
+ private final ARecordType parquetSchema;
public CompiledCopyToStatement(CopyToStatement copyToStatement) {
this.query = copyToStatement.getQuery();
@@ -623,6 +624,7 @@
this.keyExpressions = copyToStatement.getKeyExpressions();
this.autogenerated = copyToStatement.isAutogenerated();
this.itemType = eddDecl.getItemType();
+ this.parquetSchema = eddDecl.getParquetSchema();
}
@Override
@@ -650,6 +652,10 @@
return itemType;
}
+ public ARecordType getParquetSchema() {
+ return parquetSchema;
+ }
+
public List<Expression> getPathExpressions() {
return pathExpressions;
}
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java
index 978997c..f8d0ff6 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/LangExpressionToPlanTranslator.java
@@ -467,7 +467,7 @@
// Write adapter configuration
WriteDataSink writeDataSink = new WriteDataSink(copyTo.getAdapter(), copyTo.getProperties(),
- copyTo.getItemType(), expr.getSourceLocation());
+ copyTo.getItemType(), copyTo.getParquetSchema(), expr.getSourceLocation());
// writeOperator
WriteOperator writeOperator = new WriteOperator(sourceExprRef, new MutableObject<>(fullPathExpr),
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 8ba9a02..e1b4bb0 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -166,6 +166,7 @@
import org.apache.asterix.lang.common.statement.StartFeedStatement;
import org.apache.asterix.lang.common.statement.StopFeedStatement;
import org.apache.asterix.lang.common.statement.SynonymDropStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.TypeDecl;
import org.apache.asterix.lang.common.statement.TypeDropStatement;
import org.apache.asterix.lang.common.statement.UpsertStatement;
@@ -422,6 +423,9 @@
case DATAVERSE_DROP:
handleDataverseDropStatement(metadataProvider, stmt, hcc, requestParameters);
break;
+ case TRUNCATE:
+ handleDatasetTruncateStatement(metadataProvider, stmt, requestParameters);
+ break;
case DATASET_DROP:
handleDatasetDropStatement(metadataProvider, stmt, hcc, requestParameters);
break;
@@ -2379,6 +2383,28 @@
// may be overridden by product extensions for additional checks after dropping a database/dataverse
}
+ public void handleDatasetTruncateStatement(MetadataProvider metadataProvider, Statement stmt,
+ IRequestParameters requestParameters) throws Exception {
+ TruncateDatasetStatement truncateStmt = (TruncateDatasetStatement) stmt;
+ SourceLocation sourceLoc = truncateStmt.getSourceLocation();
+ String datasetName = truncateStmt.getDatasetName().getValue();
+ metadataProvider.validateDatabaseObjectName(truncateStmt.getNamespace(), datasetName, sourceLoc);
+ Namespace stmtActiveNamespace = getActiveNamespace(truncateStmt.getNamespace());
+ DataverseName dataverseName = stmtActiveNamespace.getDataverseName();
+ String databaseName = stmtActiveNamespace.getDatabaseName();
+ if (isCompileOnly()) {
+ return;
+ }
+ lockUtil.truncateDatasetBegin(lockManager, metadataProvider.getLocks(), databaseName, dataverseName,
+ datasetName);
+ try {
+ doTruncateDataset(databaseName, dataverseName, datasetName, metadataProvider, truncateStmt.getIfExists(),
+ sourceLoc);
+ } finally {
+ metadataProvider.getLocks().unlock();
+ }
+ }
+
public void handleDatasetDropStatement(MetadataProvider metadataProvider, Statement stmt,
IHyracksClientConnection hcc, IRequestParameters requestParameters) throws Exception {
DropDatasetStatement stmtDelete = (DropDatasetStatement) stmt;
@@ -2400,6 +2426,53 @@
}
}
+ protected void doTruncateDataset(String databaseName, DataverseName dataverseName, String datasetName,
+ MetadataProvider metadataProvider, boolean ifExists, SourceLocation sourceLoc) throws Exception {
+ MetadataTransactionContext mdTxnCtx = MetadataManager.INSTANCE.beginTransaction();
+ metadataProvider.setMetadataTxnContext(mdTxnCtx);
+ Dataset ds = null;
+ try {
+ //TODO(DB): also check for database existence?
+
+ // Check if the dataverse exists
+ Dataverse dv = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, databaseName, dataverseName);
+ if (dv == null) {
+ if (ifExists) {
+ if (warningCollector.shouldWarn()) {
+ warningCollector.warn(Warning.of(sourceLoc, ErrorCode.UNKNOWN_DATAVERSE, MetadataUtil
+ .dataverseName(databaseName, dataverseName, metadataProvider.isUsingDatabase())));
+ }
+ MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
+ return;
+ } else {
+ throw new CompilationException(ErrorCode.UNKNOWN_DATAVERSE, sourceLoc, MetadataUtil
+ .dataverseName(databaseName, dataverseName, metadataProvider.isUsingDatabase()));
+ }
+ }
+ ds = metadataProvider.findDataset(databaseName, dataverseName, datasetName, true);
+ if (ds == null) {
+ if (ifExists) {
+ MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
+ return;
+ } else {
+ throw new CompilationException(ErrorCode.UNKNOWN_DATASET_IN_DATAVERSE, sourceLoc, datasetName,
+ MetadataUtil.dataverseName(databaseName, dataverseName,
+ metadataProvider.isUsingDatabase()));
+ }
+ }
+ validateDatasetState(metadataProvider, ds, sourceLoc);
+
+ DatasetUtil.truncate(metadataProvider, ds);
+
+ MetadataManager.INSTANCE.commitTransaction(mdTxnCtx);
+ } catch (Exception e) {
+ LOGGER.error("failed to truncate collection {}",
+ new DatasetFullyQualifiedName(databaseName, dataverseName, datasetName), e);
+ abort(e, e, mdTxnCtx);
+ throw e;
+ }
+ }
+
protected boolean doDropDataset(String databaseName, DataverseName dataverseName, String datasetName,
MetadataProvider metadataProvider, boolean ifExists, IHyracksClientConnection hcc,
IRequestParameters requestParameters, boolean dropCorrespondingNodeGroup, SourceLocation sourceLoc)
@@ -4139,8 +4212,8 @@
DataverseName.createFromCanonicalForm(ExternalDataConstants.DUMMY_DATAVERSE_NAME);
IAType iaType = translateType(ExternalDataConstants.DUMMY_DATABASE_NAME, dummyDataverse,
ExternalDataConstants.DUMMY_TYPE_NAME, copyTo.getType(), mdTxnCtx);
- edd.getProperties().put(ExternalDataConstants.PARQUET_SCHEMA_KEY,
- SchemaConverterVisitor.convertToParquetSchemaString((ARecordType) iaType));
+ edd.setParquetSchema((ARecordType) iaType);
+ SchemaConverterVisitor.convertToParquetSchema((ARecordType) iaType);
}
}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.01.ddl.sqlpp
new file mode 100644
index 0000000..8f9bb53
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.01.ddl.sqlpp
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+
+CREATE TYPE ColumnType1 AS {
+ id: integer
+ };
+
+CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.02.update.sqlpp
new file mode 100644
index 0000000..9022632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.02.update.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : create a dataset using year-month-duration as the primary key
+ * Expected Res : Success
+ * Date : 7 May 2013
+ * Issue : 363
+ */
+
+use test;
+/*
+insert into TestCollection({"id":`year-month-duration`("P16Y"), "name": "John"});
+insert into TestCollection({"id":`day-time-duration`("-P3829H849.392S"), "name": "Alex"});
+*/
+
+insert into TestCollection({"id":18, "Director=name": "SS Rajamouli", "Director.Age" : 51 ,"Films Made" : ["RRR", "Eega", "Baahubali"] });
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.03.update.sqlpp
new file mode 100644
index 0000000..3da960b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.03.update.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-field-names1")
+TYPE ( { id:int, `Director=name` : string, `Director.Age` : int ,`Films Made` : [string] } )
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+};
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.04.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.04.ddl.sqlpp
new file mode 100644
index 0000000..38acb88
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.04.ddl.sqlpp
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE TYPE ColumnType2 AS {
+};
+
+
+
+CREATE EXTERNAL DATASET TestDataset1(ColumnType2) USING %adapter%
+(
+ %template%,
+ %additional_Properties%,
+ ("definition"="%path_prefix%copy-to-result/parquet-field-names1/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.05.query.sqlpp
new file mode 100644
index 0000000..86d344c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.05.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM TestDataset1 c
+ORDER BY c.id;
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.06.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.06.update.sqlpp
new file mode 100644
index 0000000..f72a1f5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.06.update.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+select c.* from TestCollection c
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet-field-names2")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ };
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.07.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.07.ddl.sqlpp
new file mode 100644
index 0000000..17003c5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.07.ddl.sqlpp
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE EXTERNAL DATASET TestDataset2(ColumnType2) USING %adapter%
+(
+ %template%,
+ %additional_Properties%,
+ ("definition"="%path_prefix%copy-to-result/parquet-field-names2/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.08.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.08.query.sqlpp
new file mode 100644
index 0000000..ce09a4a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-field-names/parquet-field-names.08.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM TestDataset2 c
+ORDER BY c.id;
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.01.ddl.sqlpp
new file mode 100644
index 0000000..18178ed
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.01.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+CREATE COLLECTION orders if not exists primary key (my_id: string);
+
+CREATE TYPE type1 as {my_id: int};
+CREATE COLLECTION users(type1) primary key my_id;
+CREATE INDEX users_first_name ON users(name.first: string) TYPE BTREE;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.02.update.sqlpp
new file mode 100644
index 0000000..54bd063
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.02.update.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+INSERT INTO test.orders([
+{"my_id": "a", "f": null },
+{"my_id": "b"},
+{"my_id": "c", "f": {"inner_f": "foo", "inner_f2": {"f3": "bar"} } }
+]);
+
+INSERT INTO test.users([
+{"my_id": 1, "address":{"city": "C1"}, "name":{"first": "F1", "last": "L1"}},
+{"my_id": 2, "address":{"city": "C2"}, "name":{"first": "F2", "last": "L1"}},
+{"my_id": 3, "address":{"city": "C2"}, "name":{"first": "F1", "last": "L2"}}
+]);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.03.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.03.query.sqlpp
new file mode 100644
index 0000000..6af3c4d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.03.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+select *
+from test.orders
+order by my_id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.04.query.sqlpp
new file mode 100644
index 0000000..cb96fc6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.04.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+select *
+from test.users
+order by my_id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.05.query.sqlpp
new file mode 100644
index 0000000..c96ae04
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.05.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+SET `import-private-functions` `true`;
+FROM DUMP_INDEX("test", "users", "users_first_name") AS v
+SELECT COUNT(*);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.06.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.06.ddl.sqlpp
new file mode 100644
index 0000000..96970d3
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.06.ddl.sqlpp
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+TRUNCATE DATASET test.users;
+TRUNCATE DATASET test.orders;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.07.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.07.query.sqlpp
new file mode 100644
index 0000000..625ed3f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.07.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+ SET `import-private-functions` `true`;
+ FROM DUMP_INDEX("test", "users", "users_first_name") AS v
+ SELECT COUNT(*);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.08.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.08.query.sqlpp
new file mode 100644
index 0000000..6af3c4d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.08.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+select *
+from test.orders
+order by my_id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.09.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.09.query.sqlpp
new file mode 100644
index 0000000..cb96fc6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.09.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+select *
+from test.users
+order by my_id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.10.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.10.ddl.sqlpp
new file mode 100644
index 0000000..3b957ba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/dml/truncate-dataset-1/truncate-dataset.10.ddl.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+drop dataset orders;
+drop dataset users;
+
+drop dataverse test;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.05.adm
new file mode 100644
index 0000000..4566d53
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.05.adm
@@ -0,0 +1 @@
+{ "id": 18, "Director=name": "SS Rajamouli", "Director.Age": 51, "Films Made": [ "RRR", "Eega", "Baahubali" ] }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.08.adm
new file mode 100644
index 0000000..fa88a74
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-field-names/parquet-field-names.08.adm
@@ -0,0 +1 @@
+{ "Director.Age": 51, "Films Made": [ "RRR", "Eega", "Baahubali" ], "Director=name": "SS Rajamouli", "id": 18 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.03.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.03.adm
new file mode 100644
index 0000000..84966a8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.03.adm
@@ -0,0 +1,3 @@
+{ "orders": { "my_id": "a", "f": null } }
+{ "orders": { "my_id": "b" } }
+{ "orders": { "my_id": "c", "f": { "inner_f": "foo", "inner_f2": { "f3": "bar" } } } }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.04.adm
new file mode 100644
index 0000000..92ad35d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.04.adm
@@ -0,0 +1,3 @@
+{ "users": { "my_id": 1, "address": { "city": "C1" }, "name": { "first": "F1", "last": "L1" } } }
+{ "users": { "my_id": 2, "address": { "city": "C2" }, "name": { "first": "F2", "last": "L1" } } }
+{ "users": { "my_id": 3, "address": { "city": "C2" }, "name": { "first": "F1", "last": "L2" } } }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.05.adm
new file mode 100644
index 0000000..2b5dd69
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.05.adm
@@ -0,0 +1 @@
+{ "$1": 3 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.07.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.07.adm
new file mode 100644
index 0000000..3ff59f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.07.adm
@@ -0,0 +1 @@
+{ "$1": 0 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.08.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.08.adm
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.08.adm
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.09.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.09.adm
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/dml/truncate-dataset-1/truncate-dataset-1.09.adm
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index 9480064..87baa0d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -4533,6 +4533,11 @@
</test-group>
<test-group name="dml">
<test-case FilePath="dml">
+ <compilation-unit name="truncate-dataset-1">
+ <output-dir compare="Clean-JSON">truncate-dataset-1</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="dml">
<compilation-unit name="insert-with-autogenerated-pk_adm-with-sec-primary-index">
<output-dir compare="Text">insert-with-autogenerated-pk_adm-with-sec-primary-index</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index e8e89de..95291ad 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -115,6 +115,16 @@
</compilation-unit>
</test-case>
<test-case FilePath="copy-to">
+ <compilation-unit name="parquet-field-names">
+ <placeholder name="adapter" value="S3" />
+ <placeholder name="pathprefix" value="" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additionalProperties" value='"container":"playground",' />
+ <placeholder name="additional_Properties" value='("container"="playground")' />
+ <output-dir compare="Text">parquet-field-names</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
<compilation-unit name="parquet-empty-array">
<placeholder name="adapter" value="S3" />
<placeholder name="pathprefix" value="" />
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageCloudClient.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageCloudClient.java
index 84d68e3..0a67534 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageCloudClient.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/azure/blobstorage/AzBlobStorageCloudClient.java
@@ -50,6 +50,7 @@
import org.apache.asterix.cloud.clients.profiler.RequestLimiterNoOpProfiler;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.exceptions.RuntimeDataException;
+import org.apache.asterix.external.util.azure.blob_storage.AzureConstants;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.control.nc.io.IOManager;
@@ -393,6 +394,7 @@
private static BlobContainerClient buildClient(AzBlobStorageClientConfig config) {
BlobContainerClientBuilder blobContainerClientBuilder =
new BlobContainerClientBuilder().containerName(config.getBucket()).endpoint(getEndpoint(config));
+ blobContainerClientBuilder.httpLogOptions(AzureConstants.HTTP_LOG_OPTIONS);
configCredentialsToAzClient(blobContainerClientBuilder, config);
BlobContainerClient blobContainerClient = blobContainerClientBuilder.buildClient();
blobContainerClient.createIfNotExists();
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetExternalWriterFactory.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetExternalWriterFactory.java
index d754068..5d9ab7f 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetExternalWriterFactory.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetExternalWriterFactory.java
@@ -53,7 +53,7 @@
public IExternalWriter createWriter(ParquetSchemaTree.SchemaNode schemaNode) throws HyracksDataException {
MessageType schema = generateSchema(schemaNode);
- printerFactory.setParquetSchemaString(schema.toString());
+ printerFactory.setParquetSchema(schema);
IExternalFileWriter writer = writerFactory.createWriter(ctx, printerFactory);
return new ExternalFileWriter(resolver, writer, maxResult);
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
index b500cbe..25fbdc8 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSchemaInferPoolWriter.java
@@ -30,6 +30,7 @@
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+// Maintains a pool of Parquet writers holding a file, each with its own schema , and writes values to the appropriate writer based on schema.
public class ParquetSchemaInferPoolWriter {
private final ParquetExternalWriterFactory writerFactory;
@@ -57,6 +58,7 @@
if (schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.EQUIVALENT)) {
return;
} else if (schemaComparisonType.equals(ISchemaChecker.SchemaComparisonType.GROWING)) {
+ // If the schema is growing, close the existing writer and create a new one with the new schema.
schemaNodes.set(i, schemaLazyVisitor.inferSchema(value));
closeWriter(i);
return;
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSinkExternalWriterRuntime.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSinkExternalWriterRuntime.java
index 3dbd4d3..7c1c03b 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSinkExternalWriterRuntime.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/parquet/ParquetSinkExternalWriterRuntime.java
@@ -76,6 +76,7 @@
}
+ // Schema Inference is done frame wise, i.e., we infer the schema for all the records in frame and write the values with schema inferred until now.
@Override
public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
tupleAccessor.reset(buffer);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/IMetadataLockUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/IMetadataLockUtil.java
index 4cf938e..6ba4839 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/IMetadataLockUtil.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/metadata/IMetadataLockUtil.java
@@ -57,6 +57,9 @@
void dropDatasetBegin(IMetadataLockManager lockManager, LockList locks, String database,
DataverseName dataverseName, String datasetName) throws AlgebricksException;
+ void truncateDatasetBegin(IMetadataLockManager lockManager, LockList locks, String database,
+ DataverseName dataverseName, String datasetName) throws AlgebricksException;
+
void modifyDatasetBegin(IMetadataLockManager lockManager, LockList locks, String database,
DataverseName dataverseName, String datasetName) throws AlgebricksException;
diff --git a/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf b/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf
index cbfa92e..be1bb15 100644
--- a/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf
+++ b/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf
@@ -139,6 +139,7 @@
|InsertStmnt
|UpsertStmnt
|DeleteStmnt
+ |TruncateStmnt
UseStmnt ::= "USE" DataverseName
@@ -250,6 +251,8 @@
| "INDEX" DoubleQualifiedName
| "FUNCTION" FunctionSignature ) ("IF" "EXISTS")?
+TruncateStmnt ::= "TRUNCATE" "DATASET" QualifiedName ("IF" "EXISTS")?
+
FunctionSignature ::= QualifiedName ( ( "(" ( FunctionParameters? | IntegerLiteral ) ")" ) | ("@" IntegerLiteral) )
LoadStmnt ::= "LOAD" "DATASET" QualifiedName "USING" AdapterName Configuration ("PRE-SORTED")?
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureConstants.java
index 9ade27b..01ee148 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureConstants.java
@@ -18,6 +18,9 @@
*/
package org.apache.asterix.external.util.azure.blob_storage;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
+
/*
* Note: Azure Blob and Azure Datalake use identical authentication, so they are using the same properties.
* If they end up diverging, then properties for AzureBlob and AzureDataLake need to be created.
@@ -27,6 +30,13 @@
throw new AssertionError("do not instantiate");
}
+ public static final HttpLogOptions HTTP_LOG_OPTIONS = new HttpLogOptions();
+ static {
+ HTTP_LOG_OPTIONS.setLogLevel(HttpLogDetailLevel.BASIC);
+ HTTP_LOG_OPTIONS.addAllowedQueryParamName("restype");
+ HTTP_LOG_OPTIONS.addAllowedQueryParamName("comp");
+ HTTP_LOG_OPTIONS.addAllowedQueryParamName("prefix");
+ }
/*
* Asterix Configuration Keys
*/
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureUtils.java
index fb594b9..ac407f4 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/azure/blob_storage/AzureUtils.java
@@ -115,6 +115,8 @@
// Client builder
BlobServiceClientBuilder builder = new BlobServiceClientBuilder();
+ builder.httpLogOptions(AzureConstants.HTTP_LOG_OPTIONS);
+
int timeout = appCtx.getExternalProperties().getAzureRequestTimeout();
RequestRetryOptions requestRetryOptions = new RequestRetryOptions(null, null, timeout, null, null, null);
builder.retryOptions(requestRetryOptions);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
index ba7a1ee..2f95b3e 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -34,23 +34,21 @@
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.MessageTypeParser;
public class ParquetExternalFilePrinter implements IExternalPrinter {
private final IAType typeInfo;
private final CompressionCodecName compressionCodecName;
- private MessageType schema;
+ private final MessageType schema;
private ParquetOutputFile parquetOutputFile;
- private String parquetSchemaString;
private ParquetWriter<IValueReference> writer;
private final long rowGroupSize;
private final int pageSize;
private final ParquetProperties.WriterVersion writerVersion;
- public ParquetExternalFilePrinter(CompressionCodecName compressionCodecName, String parquetSchemaString,
- IAType typeInfo, long rowGroupSize, int pageSize, ParquetProperties.WriterVersion writerVersion) {
+ public ParquetExternalFilePrinter(CompressionCodecName compressionCodecName, MessageType schema, IAType typeInfo,
+ long rowGroupSize, int pageSize, ParquetProperties.WriterVersion writerVersion) {
this.compressionCodecName = compressionCodecName;
- this.parquetSchemaString = parquetSchemaString;
+ this.schema = schema;
this.typeInfo = typeInfo;
this.rowGroupSize = rowGroupSize;
this.pageSize = pageSize;
@@ -59,7 +57,6 @@
@Override
public void open() throws HyracksDataException {
- schema = MessageTypeParser.parseMessageType(parquetSchemaString);
}
@Override
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java
index b6ad34e..d93decd 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java
@@ -18,25 +18,34 @@
*/
package org.apache.asterix.external.writer.printer;
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.external.writer.printer.parquet.SchemaConverterVisitor;
+import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.IAType;
import org.apache.asterix.runtime.writer.IExternalPrinter;
import org.apache.asterix.runtime.writer.IExternalPrinterFactory;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.schema.MessageType;
public class ParquetExternalFilePrinterFactory implements IExternalPrinterFactory {
private static final long serialVersionUID = 8971234908711235L;
- private String parquetSchemaString;
+ // parquetInferredSchema is for the case when the schema is inferred from the data, not provided by the user
+ // set During the runtime
+ private transient MessageType parquetInferredSchema;
+ // parquetProvidedSchema is for the case when the schema is provided by the user
+ private ARecordType parquetProvidedSchema;
private final IAType typeInfo;
private final CompressionCodecName compressionCodecName;
private final long rowGroupSize;
private final int pageSize;
private final ParquetProperties.WriterVersion writerVersion;
- public ParquetExternalFilePrinterFactory(CompressionCodecName compressionCodecName, String parquetSchemaString,
- IAType typeInfo, long rowGroupSize, int pageSize, ParquetProperties.WriterVersion writerVersion) {
+ public ParquetExternalFilePrinterFactory(CompressionCodecName compressionCodecName,
+ ARecordType parquetprovidedSchema, IAType typeInfo, long rowGroupSize, int pageSize,
+ ParquetProperties.WriterVersion writerVersion) {
this.compressionCodecName = compressionCodecName;
- this.parquetSchemaString = parquetSchemaString;
+ this.parquetProvidedSchema = parquetprovidedSchema;
this.typeInfo = typeInfo;
this.rowGroupSize = rowGroupSize;
this.pageSize = pageSize;
@@ -52,13 +61,25 @@
this.writerVersion = writerVersion;
}
- public void setParquetSchemaString(String parquetSchemaString) {
- this.parquetSchemaString = parquetSchemaString;
+ public void setParquetSchema(MessageType parquetInferredSchema) {
+ this.parquetInferredSchema = parquetInferredSchema;
}
@Override
public IExternalPrinter createPrinter() {
- return new ParquetExternalFilePrinter(compressionCodecName, parquetSchemaString, typeInfo, rowGroupSize,
- pageSize, writerVersion);
+ if (parquetInferredSchema != null) {
+ return new ParquetExternalFilePrinter(compressionCodecName, parquetInferredSchema, typeInfo, rowGroupSize,
+ pageSize, writerVersion);
+ }
+
+ MessageType schema;
+ try {
+ schema = SchemaConverterVisitor.convertToParquetSchema(parquetProvidedSchema);
+ } catch (CompilationException e) {
+ // This should not happen, Compilation Exception should be caught at the query-compile time
+ throw new RuntimeException(e);
+ }
+ return new ParquetExternalFilePrinter(compressionCodecName, schema, typeInfo, rowGroupSize, pageSize,
+ writerVersion);
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
index 7058bf6..cdf24c6 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -26,6 +26,8 @@
import org.apache.hyracks.data.std.api.IValueReference;
import org.apache.hyracks.util.string.UTF8StringUtil;
+// The Field Names Dictionary will cache the mapping between field name bytes and their corresponding string representations,
+// minimizing the creation of new string objects during field name deserialization while writing to parquet files.
public class FieldNamesDictionary {
private final FieldNamesTrieDictionary trie;
private final List<String> fieldNames;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ISchemaChecker.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ISchemaChecker.java
index 99b9736..dfa6e4f 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ISchemaChecker.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ISchemaChecker.java
@@ -22,6 +22,10 @@
import org.apache.hyracks.data.std.api.IValueReference;
public interface ISchemaChecker {
+
+ // EQUIVALENT: Example: { name: string, age: int } -> { name: string, age: int }
+ // GROWING: equivalent types but having extra fields, Example: { name: string, age: int } -> { name: string, age: int , address: string }
+ // CONFLICTING: conflict in types, Example: { name: string, age: int } -> { name: {first:string, last:string}, age: int }
enum SchemaComparisonType {
EQUIVALENT,
GROWING,
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
index 373bfe4..2a03bfd 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -44,6 +44,8 @@
private final MessageType schema;
private final RecordLazyVisitablePointable rec;
+ // The Record Consumer is responsible for traversing the record tree,
+ // using recordConsumer.startField() to navigate into a child node and endField() to move back to the parent node.
private RecordConsumer recordConsumer;
private final FieldNamesDictionary fieldNamesDictionary;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
index b591175..70872bb 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetSchemaLazyVisitor.java
@@ -39,6 +39,7 @@
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Types;
+// This class is used to infer the schema of a record into SchemaNode, which is an internal tree representation of the schema.
public class ParquetSchemaLazyVisitor implements ILazyVisitablePointableVisitor<Void, ParquetSchemaTree.SchemaNode> {
private final RecordLazyVisitablePointable rec;
private final FieldNamesDictionary fieldNamesDictionary;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
index 04e11f7..38d12f9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetValueWriter.java
@@ -44,6 +44,7 @@
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.PrimitiveType;
+//This class reduces the number of Java objects created each time a column is written to a Parquet file by reusing the same VoidPointable for all columns within the file.
public class ParquetValueWriter {
public static final String LIST_FIELD = "list";
public static final String ELEMENT_FIELD = "element";
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
index 44cd5b2..fc43c89 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaCheckerLazyVisitor.java
@@ -30,6 +30,8 @@
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.api.IValueReference;
+// This class is used to check the schema of a record against a schema that has been inferred so far.
+// By checking, we can determine if the record is equivalent to the schema, if the record is growing, or if there is a conflict.
public class SchemaCheckerLazyVisitor implements ISchemaChecker,
ILazyVisitablePointableVisitor<ISchemaChecker.SchemaComparisonType, ParquetSchemaTree.SchemaNode> {
private final FieldNamesDictionary fieldNamesDictionary;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaConverterVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaConverterVisitor.java
index a6ea115..9f5d02f 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaConverterVisitor.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/SchemaConverterVisitor.java
@@ -36,6 +36,7 @@
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Types;
+// Traverses the RecordType tree and converts it to a Parquet schema.
public class SchemaConverterVisitor implements IATypeVisitor<Void, Pair<Types.Builder, String>> {
public static String MESSAGE_NAME = "asterix_schema";
private final ARecordType schemaType;
@@ -46,9 +47,9 @@
this.unsupportedType = null;
}
- public static String convertToParquetSchemaString(ARecordType schemaType) throws CompilationException {
+ public static MessageType convertToParquetSchema(ARecordType schemaType) throws CompilationException {
SchemaConverterVisitor schemaConverterVisitor = new SchemaConverterVisitor(schemaType);
- return schemaConverterVisitor.getParquetSchema().toString();
+ return schemaConverterVisitor.getParquetSchema();
}
private MessageType getParquetSchema() throws CompilationException {
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/base/Statement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/base/Statement.java
index 42c7eb6..8e845b8 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/base/Statement.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/base/Statement.java
@@ -72,6 +72,7 @@
}
enum Kind {
+ TRUNCATE,
DATASET_DECL,
DATAVERSE_DECL,
DATABASE_DROP,
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/ExternalDetailsDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/ExternalDetailsDecl.java
index e1c978a..126f3ba 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/ExternalDetailsDecl.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/ExternalDetailsDecl.java
@@ -26,6 +26,7 @@
private Map<String, String> properties;
private String adapter;
private ARecordType itemType;
+ private ARecordType parquetSchema;
public void setAdapter(String adapter) {
this.adapter = adapter;
@@ -43,6 +44,14 @@
return itemType;
}
+ public void setParquetSchema(ARecordType parquetSchema) {
+ this.parquetSchema = parquetSchema;
+ }
+
+ public ARecordType getParquetSchema() {
+ return parquetSchema;
+ }
+
public String getAdapter() {
return adapter;
}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/TruncateDatasetStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/TruncateDatasetStatement.java
new file mode 100644
index 0000000..7367f1d
--- /dev/null
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/TruncateDatasetStatement.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.lang.common.statement;
+
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.metadata.DataverseName;
+import org.apache.asterix.common.metadata.Namespace;
+import org.apache.asterix.lang.common.base.AbstractStatement;
+import org.apache.asterix.lang.common.base.Statement;
+import org.apache.asterix.lang.common.struct.Identifier;
+import org.apache.asterix.lang.common.visitor.base.ILangVisitor;
+
+public class TruncateDatasetStatement extends AbstractStatement {
+ private final Namespace namespace;
+ private final Identifier datasetName;
+ private final boolean ifExists;
+
+ public TruncateDatasetStatement(Namespace namespace, Identifier datasetName, boolean ifExists) {
+ this.namespace = namespace;
+ this.datasetName = datasetName;
+ this.ifExists = ifExists;
+ }
+
+ @Override
+ public Statement.Kind getKind() {
+ return Kind.TRUNCATE;
+ }
+
+ public Namespace getNamespace() {
+ return namespace;
+ }
+
+ public DataverseName getDataverseName() {
+ return namespace == null ? null : namespace.getDataverseName();
+ }
+
+ public Identifier getDatasetName() {
+ return datasetName;
+ }
+
+ public boolean getIfExists() {
+ return ifExists;
+ }
+
+ @Override
+ public <R, T> R accept(ILangVisitor<R, T> visitor, T arg) throws CompilationException {
+ return visitor.visit(this, arg);
+ }
+
+ @Override
+ public byte getCategory() {
+ return Category.UPDATE;
+ }
+
+}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/FormatPrintVisitor.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/FormatPrintVisitor.java
index 6151b02..6ff28e7 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/FormatPrintVisitor.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/FormatPrintVisitor.java
@@ -110,6 +110,7 @@
import org.apache.asterix.lang.common.statement.StartFeedStatement;
import org.apache.asterix.lang.common.statement.StopFeedStatement;
import org.apache.asterix.lang.common.statement.SynonymDropStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.TypeDecl;
import org.apache.asterix.lang.common.statement.TypeDropStatement;
import org.apache.asterix.lang.common.statement.UpdateStatement;
@@ -639,6 +640,14 @@
}
@Override
+ public Void visit(TruncateDatasetStatement del, Integer step) throws CompilationException {
+ out.println(skip(step) + "truncate " + datasetSymbol
+ + generateFullName(del.getDataverseName(), del.getDatasetName()) + generateIfExists(del.getIfExists())
+ + SEMICOLON);
+ return null;
+ }
+
+ @Override
public Void visit(InsertStatement insert, Integer step) throws CompilationException {
out.print(skip(step) + "insert into " + datasetSymbol
+ generateFullName(insert.getDataverseName(), insert.getDatasetName()));
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/AbstractQueryExpressionVisitor.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/AbstractQueryExpressionVisitor.java
index 0118f4c..9c20286 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/AbstractQueryExpressionVisitor.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/AbstractQueryExpressionVisitor.java
@@ -67,6 +67,7 @@
import org.apache.asterix.lang.common.statement.StartFeedStatement;
import org.apache.asterix.lang.common.statement.StopFeedStatement;
import org.apache.asterix.lang.common.statement.SynonymDropStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.TypeDecl;
import org.apache.asterix.lang.common.statement.TypeDropStatement;
import org.apache.asterix.lang.common.statement.UpdateStatement;
@@ -106,6 +107,11 @@
}
@Override
+ public R visit(TruncateDatasetStatement del, T arg) throws CompilationException {
+ return null;
+ }
+
+ @Override
public R visit(DatasetDecl dd, T arg) throws CompilationException {
return null;
}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/ILangVisitor.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/ILangVisitor.java
index 1ed9b3c..c749118 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/ILangVisitor.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/visitor/base/ILangVisitor.java
@@ -85,6 +85,7 @@
import org.apache.asterix.lang.common.statement.StartFeedStatement;
import org.apache.asterix.lang.common.statement.StopFeedStatement;
import org.apache.asterix.lang.common.statement.SynonymDropStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.TypeDecl;
import org.apache.asterix.lang.common.statement.TypeDropStatement;
import org.apache.asterix.lang.common.statement.UpdateStatement;
@@ -111,6 +112,8 @@
R visit(DropDatasetStatement del, T arg) throws CompilationException;
+ R visit(TruncateDatasetStatement del, T arg) throws CompilationException;
+
R visit(InsertStatement insert, T arg) throws CompilationException;
R visit(DeleteStatement del, T arg) throws CompilationException;
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
index 6b3aa54..31641a7 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
+++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
@@ -155,6 +155,7 @@
import org.apache.asterix.lang.common.statement.DeleteStatement;
import org.apache.asterix.lang.common.statement.DisconnectFeedStatement;
import org.apache.asterix.lang.common.statement.DropDatasetStatement;
+import org.apache.asterix.lang.common.statement.TruncateDatasetStatement;
import org.apache.asterix.lang.common.statement.ExternalDetailsDecl;
import org.apache.asterix.lang.common.statement.FeedDropStatement;
import org.apache.asterix.lang.common.statement.FeedPolicyDropStatement;
@@ -1015,6 +1016,7 @@
| stmt = LoadStatement()
| stmt = CopyStatement()
| stmt = DropStatement()
+ | stmt = TruncateStatement()
| stmt = SetStatement()
| stmt = InsertStatement()
| stmt = DeleteStatement()
@@ -2367,6 +2369,43 @@
return new Pair<List<Integer>, List<List<String>>> (keyFieldSourceIndicators, primaryKeyFields);
}
}
+Statement TruncateStatement() throws ParseException:
+{
+ Token startToken = null;
+ Statement stmt = null;
+}
+{
+ <TRUNCATE> { startToken = token; }
+ (
+ stmt = TruncateDatasetStatement(startToken)
+ )
+ {
+ return stmt;
+ }
+}
+TruncateDatasetStatement TruncateDatasetStatement(Token startStmtToken) throws ParseException:
+{
+ TruncateDatasetStatement stmt = null;
+}
+{
+ Dataset() stmt = TruncateDatasetSpecification(startStmtToken)
+ {
+ return stmt;
+ }
+}
+
+TruncateDatasetStatement TruncateDatasetSpecification(Token startStmtToken) throws ParseException:
+{
+ Pair<Namespace,Identifier> pairId = null;
+ boolean ifExists = false;
+}
+{
+ pairId = QualifiedName() ifExists = IfExists()
+ {
+ TruncateDatasetStatement stmt = new TruncateDatasetStatement(pairId.first, pairId.second, ifExists);
+ return addSourceLocation(stmt, startStmtToken);
+ }
+}
Statement DropStatement() throws ParseException:
{
@@ -5969,6 +6008,7 @@
| <DISTINCT : "distinct">
| <DIV : "div">
| <DROP : "drop">
+ | <TRUNCATE : "truncate">
| <ELEMENT : "element">
| <EXPLAIN : "explain">
| <ELSE : "else">
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/IExternalWriteDataSink.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/IExternalWriteDataSink.java
index 64f8d6d..1168ba1 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/IExternalWriteDataSink.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/IExternalWriteDataSink.java
@@ -26,5 +26,7 @@
public interface IExternalWriteDataSink extends IWriteDataSink {
ARecordType getItemType();
+ ARecordType getParquetSchema();
+
SourceLocation getSourceLoc();
}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/WriteDataSink.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/WriteDataSink.java
index d1667bf..4a10f7f 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/WriteDataSink.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/WriteDataSink.java
@@ -28,14 +28,16 @@
public class WriteDataSink implements IExternalWriteDataSink {
private final String adapterName;
private final Map<String, String> configuration;
- private ARecordType itemType;
- private SourceLocation sourceLoc;
+ private final ARecordType itemType;
+ private final ARecordType parquetSchema;
+ private final SourceLocation sourceLoc;
public WriteDataSink(String adapterName, Map<String, String> configuration, ARecordType itemType,
- SourceLocation sourceLoc) {
+ ARecordType parquetSchema, SourceLocation sourceLoc) {
this.adapterName = adapterName;
this.configuration = configuration;
this.itemType = itemType;
+ this.parquetSchema = parquetSchema;
this.sourceLoc = sourceLoc;
}
@@ -43,6 +45,7 @@
this.adapterName = writeDataSink.getAdapterName();
this.configuration = new HashMap<>(writeDataSink.configuration);
this.itemType = writeDataSink.itemType;
+ this.parquetSchema = writeDataSink.parquetSchema;
this.sourceLoc = writeDataSink.sourceLoc;
}
@@ -52,6 +55,11 @@
}
@Override
+ public ARecordType getParquetSchema() {
+ return parquetSchema;
+ }
+
+ @Override
public SourceLocation getSourceLoc() {
return sourceLoc;
}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
index e6716df..e88b1de 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
@@ -205,7 +205,7 @@
case ExternalDataConstants.FORMAT_PARQUET:
CompressionCodecName compressionCodecName;
- if (compression == null || compression.equals("") || compression.equals("none")) {
+ if (compression == null || compression.isEmpty() || compression.equals("none")) {
compressionCodecName = CompressionCodecName.UNCOMPRESSED;
} else {
compressionCodecName = CompressionCodecName.valueOf(compression.toUpperCase());
@@ -218,10 +218,11 @@
int pageSize = (int) StorageUtil.getByteValue(pageSizeString);
ParquetProperties.WriterVersion writerVersion = getParquetWriterVersion(configuration);
- if (configuration.get(ExternalDataConstants.PARQUET_SCHEMA_KEY) != null) {
- String parquetSchemaString = configuration.get(ExternalDataConstants.PARQUET_SCHEMA_KEY);
+ ARecordType parquetSchema = ((IExternalWriteDataSink) sink).getParquetSchema();
+
+ if (parquetSchema != null) {
ParquetExternalFilePrinterFactory parquetPrinterFactory =
- new ParquetExternalFilePrinterFactory(compressionCodecName, parquetSchemaString,
+ new ParquetExternalFilePrinterFactory(compressionCodecName, parquetSchema,
(IAType) sourceType, rowGroupSize, pageSize, writerVersion);
ExternalFileWriterFactory parquetWriterFactory = new ExternalFileWriterFactory(fileWriterFactory,
@@ -230,6 +231,7 @@
partitionComparatorFactories, inputDesc, parquetWriterFactory);
}
+ // Parquet Writing with Schema Inference
int maxSchemas = ExternalWriterProvider.getMaxParquetSchema(configuration);
ParquetExternalFilePrinterFactoryProvider printerFactoryProvider =
new ParquetExternalFilePrinterFactoryProvider(compressionCodecName, (IAType) sourceType,
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetPartitions.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetPartitions.java
new file mode 100644
index 0000000..09e0730
--- /dev/null
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetPartitions.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.metadata.utils;
+
+import java.io.Serializable;
+import java.util.List;
+
+import org.apache.asterix.metadata.entities.Dataset;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+
+public class DatasetPartitions implements Serializable {
+ private static final long serialVersionUID = 1L;
+ private final Dataset dataset;
+ private final List<Integer> partitions;
+ private final IIndexDataflowHelperFactory primaryIndexDataflowHelperFactory;
+ private final List<IIndexDataflowHelperFactory> secondaryIndexDataflowHelperFactories;
+
+ public DatasetPartitions(Dataset dataset, List<Integer> partitions,
+ IIndexDataflowHelperFactory primaryIndexDataflowHelperFactory,
+ List<IIndexDataflowHelperFactory> secondaryIndexDataflowHelperFactories) {
+ this.dataset = dataset;
+ this.partitions = partitions;
+ this.primaryIndexDataflowHelperFactory = primaryIndexDataflowHelperFactory;
+ this.secondaryIndexDataflowHelperFactories = secondaryIndexDataflowHelperFactories;
+ }
+
+ public Dataset getDataset() {
+ return dataset;
+ }
+
+ public List<Integer> getPartitions() {
+ return partitions;
+ }
+
+ public IIndexDataflowHelperFactory getPrimaryIndexDataflowHelperFactory() {
+ return primaryIndexDataflowHelperFactory;
+ }
+
+ public List<IIndexDataflowHelperFactory> getSecondaryIndexDataflowHelperFactories() {
+ return secondaryIndexDataflowHelperFactories;
+ }
+
+ @Override
+ public String toString() {
+ return "{ \"dataset\" : " + dataset + ", \"partitions\" : " + partitions + " }";
+ }
+}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetUtil.java
index 7df9b47..8bcf7a2 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/DatasetUtil.java
@@ -24,10 +24,12 @@
import java.io.DataOutput;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
+import java.util.stream.Collectors;
import org.apache.asterix.builders.IARecordBuilder;
import org.apache.asterix.builders.RecordBuilder;
@@ -35,9 +37,12 @@
import org.apache.asterix.common.config.DatasetConfig;
import org.apache.asterix.common.config.DatasetConfig.DatasetType;
import org.apache.asterix.common.context.CorrelatedPrefixMergePolicyFactory;
+import org.apache.asterix.common.context.DatasetLifecycleManager;
+import org.apache.asterix.common.context.DatasetResource;
import org.apache.asterix.common.context.IStorageComponentProvider;
import org.apache.asterix.common.context.ITransactionSubsystemProvider;
import org.apache.asterix.common.context.TransactionSubsystemProvider;
+import org.apache.asterix.common.dataflow.DatasetLocalResource;
import org.apache.asterix.common.dataflow.ICcApplicationContext;
import org.apache.asterix.common.exceptions.ACIDException;
import org.apache.asterix.common.exceptions.AsterixException;
@@ -46,6 +51,7 @@
import org.apache.asterix.common.metadata.MetadataConstants;
import org.apache.asterix.common.metadata.MetadataUtil;
import org.apache.asterix.common.transactions.IRecoveryManager;
+import org.apache.asterix.common.utils.JobUtils;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.formats.base.IDataFormat;
import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
@@ -71,11 +77,13 @@
import org.apache.asterix.runtime.operators.LSMPrimaryUpsertOperatorDescriptor;
import org.apache.asterix.runtime.utils.RuntimeUtils;
import org.apache.asterix.transaction.management.opcallbacks.PrimaryIndexInstantSearchOperationCallbackFactory;
+import org.apache.hyracks.algebricks.common.constraints.AlgebricksAbsolutePartitionConstraint;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraint;
import org.apache.hyracks.algebricks.common.constraints.AlgebricksPartitionConstraintHelper;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.data.IBinaryComparatorFactoryProvider;
+import org.apache.hyracks.api.client.IHyracksClientConnection;
import org.apache.hyracks.api.dataflow.IOperatorDescriptor;
import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import org.apache.hyracks.api.dataflow.value.IBinaryHashFunctionFactory;
@@ -99,6 +107,7 @@
import org.apache.hyracks.storage.am.common.build.IndexBuilderFactory;
import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
import org.apache.hyracks.storage.am.common.dataflow.IndexCreateOperatorDescriptor;
+import org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
import org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelperFactory;
import org.apache.hyracks.storage.am.common.dataflow.IndexDropOperatorDescriptor;
import org.apache.hyracks.storage.am.common.impls.DefaultTupleProjectorFactory;
@@ -107,6 +116,8 @@
import org.apache.hyracks.storage.am.lsm.common.api.ILSMTupleFilterCallbackFactory;
import org.apache.hyracks.storage.am.lsm.common.dataflow.LSMTreeIndexCompactOperatorDescriptor;
import org.apache.hyracks.storage.common.IResourceFactory;
+import org.apache.hyracks.storage.common.IStorageManager;
+import org.apache.hyracks.storage.common.LocalResource;
import org.apache.hyracks.storage.common.projection.ITupleProjectorFactory;
import org.apache.hyracks.util.LogRedactionUtil;
import org.apache.logging.log4j.LogManager;
@@ -745,6 +756,98 @@
&& dataset.getDatasetFormatInfo().getFormat() == DatasetConfig.DatasetFormat.COLUMN;
}
+ public static void truncate(MetadataProvider metadataProvider, Dataset ds) throws Exception {
+ if (ds.getDatasetType() == DatasetType.INTERNAL) {
+ IHyracksClientConnection hcc;
+ Map<String, List<DatasetPartitions>> nc2Resources = getNodeResources(metadataProvider, ds);
+ AlgebricksAbsolutePartitionConstraint nodeSet =
+ new AlgebricksAbsolutePartitionConstraint(nc2Resources.keySet().toArray(new String[0]));
+ JobSpecification job = new JobSpecification();
+ IOperatorDescriptor truncateOp = new TruncateOperatorDescriptor(job, nc2Resources);
+ AlgebricksPartitionConstraintHelper.setPartitionConstraintInJobSpec(job, truncateOp, nodeSet);
+ hcc = metadataProvider.getApplicationContext().getHcc();
+ JobUtils.runJobIfActive(hcc, job, true);
+ } else {
+ throw new IllegalArgumentException("Cannot truncate a non-internal dataset.");
+ }
+ }
+
+ public static DatasetPartitions getPartitionsAndDataflowHelperFactory(Dataset dataset,
+ List<DatasetPartitions> ncResources, IIndexDataflowHelperFactory primary,
+ List<IIndexDataflowHelperFactory> secondaries) {
+ DatasetPartitions partitionsAndDataflowHelperFactory;
+ if (ncResources.isEmpty()) {
+ partitionsAndDataflowHelperFactory =
+ new DatasetPartitions(dataset, new ArrayList<>(), primary, secondaries);
+ ncResources.add(partitionsAndDataflowHelperFactory);
+ } else {
+ DatasetPartitions last = ncResources.get(ncResources.size() - 1);
+ if (last.getPrimaryIndexDataflowHelperFactory() == primary) {
+ partitionsAndDataflowHelperFactory = last;
+ } else {
+ partitionsAndDataflowHelperFactory =
+ new DatasetPartitions(dataset, new ArrayList<>(), primary, secondaries);
+ ncResources.add(partitionsAndDataflowHelperFactory);
+ }
+ }
+ return partitionsAndDataflowHelperFactory;
+ }
+
+ public static Map<String, List<DatasetPartitions>> getNodeResources(MetadataProvider metadataProvider,
+ Dataset dataset) throws AlgebricksException {
+ Map<String, List<DatasetPartitions>> nc2Resources = new HashMap<>();
+ IStorageManager storageManager = metadataProvider.getStorageComponentProvider().getStorageManager();
+
+ // get secondary indexes
+ List<Index> secondaryIndexes =
+ metadataProvider
+ .getDatasetIndexes(dataset.getDatabaseName(), dataset.getDataverseName(),
+ dataset.getDatasetName())
+ .stream().filter(Index::isSecondaryIndex).collect(Collectors.toList());
+ PartitioningProperties partitioningProperties = metadataProvider.getPartitioningProperties(dataset);
+ IIndexDataflowHelperFactory primary =
+ new IndexDataflowHelperFactory(storageManager, partitioningProperties.getSplitsProvider());
+ List<IIndexDataflowHelperFactory> secondaries =
+ secondaryIndexes.isEmpty() ? Collections.emptyList() : new ArrayList<>();
+ for (Index index : secondaryIndexes) {
+ PartitioningProperties idxPartitioningProperties =
+ metadataProvider.getPartitioningProperties(dataset, index.getIndexName());
+ secondaries
+ .add(new IndexDataflowHelperFactory(storageManager, idxPartitioningProperties.getSplitsProvider()));
+ }
+ AlgebricksAbsolutePartitionConstraint computeLocations =
+ (AlgebricksAbsolutePartitionConstraint) partitioningProperties.getConstraints();
+ int[][] computeStorageMap = partitioningProperties.getComputeStorageMap();
+ for (int j = 0; j < computeLocations.getLocations().length; j++) {
+ String loc = computeLocations.getLocations()[j];
+ DatasetPartitions partitionsAndDataflowHelperFactories = getPartitionsAndDataflowHelperFactory(dataset,
+ nc2Resources.computeIfAbsent(loc, key -> new ArrayList<>()), primary, secondaries);
+ List<Integer> dsPartitions = partitionsAndDataflowHelperFactories.getPartitions();
+ int[] computeStoragePartitions = computeStorageMap[j];
+ for (int storagePartition : computeStoragePartitions) {
+ dsPartitions.add(storagePartition);
+ }
+ }
+ return nc2Resources;
+ }
+
+ public static DatasetResource getDatasetResource(DatasetLifecycleManager dslMgr, Integer partition,
+ IndexDataflowHelper indexHelper) throws HyracksDataException {
+ LocalResource lr = indexHelper.getResource();
+ DatasetLocalResource dslr = (DatasetLocalResource) lr.getResource();
+ int datasetId = dslr.getDatasetId();
+ DatasetResource dsr = dslMgr.getDatasetLifecycle(datasetId);
+ // Ensure that no active operations exists
+ int numActiveOperations =
+ dslMgr.getOperationTracker(datasetId, partition, lr.getPath()).getNumActiveOperations();
+ if (numActiveOperations > 0) {
+ throw new IllegalStateException("Can't truncate the collection " + dsr.getDatasetInfo().getDatasetID()
+ + " because the number of active operations = " + numActiveOperations + " in the partition "
+ + partition);
+ }
+ return dsr;
+ }
+
public static boolean isDeltaTable(Dataset dataset) {
return dataset.getDatasetType() == DatasetType.EXTERNAL && ExternalDataUtils
.isDeltaTable(((ExternalDatasetDetails) dataset.getDatasetDetails()).getProperties());
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataLockUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataLockUtil.java
index 0b649c4..130cc0f 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataLockUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/MetadataLockUtil.java
@@ -152,6 +152,14 @@
}
@Override
+ public void truncateDatasetBegin(IMetadataLockManager lockMgr, LockList locks, String database,
+ DataverseName dataverseName, String datasetName) throws AlgebricksException {
+ lockMgr.acquireDatabaseReadLock(locks, database);
+ lockMgr.acquireDataverseReadLock(locks, database, dataverseName);
+ lockMgr.acquireDatasetWriteLock(locks, database, dataverseName, datasetName);
+ }
+
+ @Override
public void dropTypeBegin(IMetadataLockManager lockMgr, LockList locks, String database,
DataverseName dataverseName, String typeName) throws AlgebricksException {
lockMgr.acquireDatabaseReadLock(locks, database);
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/TruncateOperatorDescriptor.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/TruncateOperatorDescriptor.java
new file mode 100644
index 0000000..24ed7b2
--- /dev/null
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/TruncateOperatorDescriptor.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.metadata.utils;
+
+import static org.apache.asterix.metadata.utils.DatasetUtil.getDatasetResource;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.function.Predicate;
+
+import org.apache.asterix.common.api.INcApplicationContext;
+import org.apache.asterix.common.context.DatasetLifecycleManager;
+import org.apache.asterix.common.context.DatasetResource;
+import org.apache.asterix.common.ioopcallbacks.LSMIOOperationCallback;
+import org.apache.asterix.common.storage.DatasetResourceReference;
+import org.apache.hyracks.api.application.INCServiceContext;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.IOperatorNodePushable;
+import org.apache.hyracks.api.dataflow.value.IRecordDescriptorProvider;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.job.IOperatorDescriptorRegistry;
+import org.apache.hyracks.dataflow.std.base.AbstractSingleActivityOperatorDescriptor;
+import org.apache.hyracks.dataflow.std.base.AbstractUnaryOutputSourceOperatorNodePushable;
+import org.apache.hyracks.storage.am.common.api.IIndexDataflowHelper;
+import org.apache.hyracks.storage.am.common.dataflow.IIndexDataflowHelperFactory;
+import org.apache.hyracks.storage.am.common.dataflow.IndexDataflowHelper;
+import org.apache.hyracks.storage.am.common.impls.NoOpIndexAccessParameters;
+import org.apache.hyracks.storage.am.common.util.ResourceReleaseUtils;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponent;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponentId;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMComponentIdGenerator;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndex;
+import org.apache.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
+import org.apache.hyracks.storage.common.LocalResource;
+import org.apache.logging.log4j.Level;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+@SuppressWarnings("squid:S1181")
+public class TruncateOperatorDescriptor extends AbstractSingleActivityOperatorDescriptor {
+
+ private static final Logger LOGGER = LogManager.getLogger();
+ private static final long serialVersionUID = 1L;
+ private static final long TIMEOUT = 5;
+ private static final TimeUnit TIMEOUT_UNIT = TimeUnit.MINUTES;
+ private final Map<String, List<DatasetPartitions>> allDatasets;
+
+ public TruncateOperatorDescriptor(IOperatorDescriptorRegistry spec,
+ Map<String, List<DatasetPartitions>> allDatasets) {
+ super(spec, 0, 0);
+ this.allDatasets = allDatasets;
+ }
+
+ @Override
+ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
+ IRecordDescriptorProvider recordDescProvider, int part, int nPartitions) throws HyracksDataException {
+ return new AbstractUnaryOutputSourceOperatorNodePushable() {
+ @Override
+ public void initialize() throws HyracksDataException {
+ INcApplicationContext appCtx =
+ (INcApplicationContext) ctx.getJobletContext().getServiceContext().getApplicationContext();
+ INCServiceContext ctx = appCtx.getServiceContext();
+ DatasetLifecycleManager dslMgr = (DatasetLifecycleManager) appCtx.getDatasetLifecycleManager();
+ List<DatasetPartitions> nodeDatasets = new ArrayList<>();
+ try {
+ List<DatasetPartitions> datasets = allDatasets.get(ctx.getNodeId());
+ for (DatasetPartitions dataset : datasets) {
+ nodeDatasets.add(dataset);
+ for (Integer partition : dataset.getPartitions()) {
+ IIndexDataflowHelper indexDataflowHelper =
+ dataset.getPrimaryIndexDataflowHelperFactory().create(ctx, partition);
+ indexDataflowHelper.open();
+ try {
+ ILSMIndex index = (ILSMIndex) indexDataflowHelper.getIndexInstance();
+ // Partial Rollback
+ final LocalResource resource = indexDataflowHelper.getResource();
+ final int indexStoragePartition =
+ DatasetResourceReference.of(resource).getPartitionNum();
+ DatasetResource dsr = getDatasetResource(dslMgr, indexStoragePartition,
+ (IndexDataflowHelper) indexDataflowHelper);
+ dsr.getDatasetInfo().waitForIO();
+ ILSMComponentIdGenerator idGenerator = dslMgr.getComponentIdGenerator(
+ dsr.getDatasetID(), indexStoragePartition, resource.getPath());
+ idGenerator.refresh();
+ truncate(ctx, index, dataset.getSecondaryIndexDataflowHelperFactories(), partition,
+ idGenerator.getId());
+ } finally {
+ indexDataflowHelper.close();
+ }
+ }
+ LOGGER.info("Truncated collection {} partitions {}", dataset.getDataset(),
+ dataset.getPartitions());
+ }
+ } catch (Throwable e) {
+ LOGGER.log(Level.ERROR, "Exception while truncating {}", nodeDatasets, e);
+ throw HyracksDataException.create(e);
+ }
+ }
+ };
+ }
+
+ private static void truncate(INCServiceContext ctx, ILSMIndex primaryIndex,
+ List<IIndexDataflowHelperFactory> secondaries, Integer partition, ILSMComponentId nextComponentId)
+ throws HyracksDataException {
+ Future<Void> truncateFuture = ctx.getControllerService().getExecutor().submit(() -> {
+ INcApplicationContext appCtx = (INcApplicationContext) ctx.getApplicationContext();
+ long flushLsn = appCtx.getTransactionSubsystem().getLogManager().getAppendLSN();
+ Map<String, Object> flushMap = new HashMap<>();
+ flushMap.put(LSMIOOperationCallback.KEY_FLUSH_LOG_LSN, flushLsn);
+ flushMap.put(LSMIOOperationCallback.KEY_NEXT_COMPONENT_ID, nextComponentId);
+ ILSMIndexAccessor lsmAccessor = primaryIndex.createAccessor(NoOpIndexAccessParameters.INSTANCE);
+ lsmAccessor.getOpContext().setParameters(flushMap);
+ Predicate<ILSMComponent> predicate = c -> true;
+ List<IIndexDataflowHelper> openedSecondaries = new ArrayList<>();
+ Throwable hde = null;
+ try {
+ for (int j = 0; j < secondaries.size(); j++) {
+ IIndexDataflowHelper sIndexDataflowHelper = secondaries.get(j).create(ctx, partition);
+ sIndexDataflowHelper.open();
+ openedSecondaries.add(sIndexDataflowHelper);
+ }
+ // truncate primary
+ lsmAccessor.deleteComponents(predicate);
+ // truncate secondaries
+ for (int j = 0; j < openedSecondaries.size(); j++) {
+ ILSMIndex sIndex = (ILSMIndex) openedSecondaries.get(j).getIndexInstance();
+ ILSMIndexAccessor sLsmAccessor = sIndex.createAccessor(NoOpIndexAccessParameters.INSTANCE);
+ sLsmAccessor.getOpContext().setParameters(flushMap);
+ sLsmAccessor.deleteComponents(predicate);
+ }
+ } catch (Throwable th) {
+ hde = HyracksDataException.create(th);
+ } finally {
+ hde = ResourceReleaseUtils.close(openedSecondaries, hde);
+ }
+ if (hde != null) {
+ throw HyracksDataException.create(hde);
+ }
+ return null;
+ });
+ try {
+ truncateFuture.get(TIMEOUT, TIMEOUT_UNIT);
+ } catch (Exception e) {
+ LOGGER.fatal("halting due to a failure to truncate", e);
+ }
+ }
+
+}