[ASTERIXDB-3392] Add parquet format for COPY TO
Change-Id: I40dc16969e66af09cde04b460f441af666b39d51
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18209
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: <preethampoluparthi@gmail.com>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Tested-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
new file mode 100644
index 0000000..36d00be
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+
+CREATE TYPE ColumnType1 AS {
+ id: integer
+};
+
+CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.02.update.sqlpp
new file mode 100644
index 0000000..23d9316
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.02.update.sqlpp
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks2")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema{"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.03.update.sqlpp
new file mode 100644
index 0000000..2e811f2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.03.update.sqlpp
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+insert into TestCollection({"id":1, "name": "John", "nested" : { "first" : "john" , "second":"JOHN" } });
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks3")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional binary id (UTF8);
+ optional binary name (UTF8);
+ optional group nested {
+ optional binary first (UTF8);
+ optional binary second (UTF8);
+ }
+ }
+"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.04.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.04.update.sqlpp
new file mode 100644
index 0000000..9933fe2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.04.update.sqlpp
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks4")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional int64 id;
+ optional binary name (UTF8);
+ optional group nested {
+ optional binary first (UTF8);
+ }
+ }
+"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.05.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.05.update.sqlpp
new file mode 100644
index 0000000..cf3a450
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.05.update.sqlpp
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks5")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional int64 id;
+ optional binary name (UTF8);
+ optional binary nested (UTF8);
+ }"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.06.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.06.update.sqlpp
new file mode 100644
index 0000000..2d53754
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.06.update.sqlpp
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks6")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message spark_schema {
+ optional int64 id;
+ optional group name {
+ optional binary first (UTF8);
+ }
+ optional group nested {
+ optional binary first (UTF8);
+ optional binary second (UTF8);
+ }
+ }"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.07.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.07.update.sqlpp
new file mode 100644
index 0000000..dcb0dcd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.07.update.sqlpp
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks7")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema{}",
+ "row-group-size":"random"
+}
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.08.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.08.update.sqlpp
new file mode 100644
index 0000000..9856c4a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.08.update.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks8")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema{}",
+ "page-size":"random"
+}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.09.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.09.update.sqlpp
new file mode 100644
index 0000000..b8579a5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.09.update.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-error-checks9")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "compression":"rar",
+ "schema":""
+}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.01.ddl.sqlpp
new file mode 100644
index 0000000..56e79c8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.01.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+
+CREATE TYPE ColumnType1 AS {
+ id: integer,
+ name : string
+};
+
+CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
new file mode 100644
index 0000000..ec1ac0c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.02.update.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description : create a dataset using year-month-duration as the primary key
+ * Expected Res : Success
+ * Date : 7 May 2013
+ * Issue : 363
+ */
+
+use test;
+/*
+insert into TestCollection({"id":`year-month-duration`("P16Y"), "name": "John"});
+insert into TestCollection({"id":`day-time-duration`("-P3829H849.392S"), "name": "Alex"});
+*/
+
+insert into TestCollection({"id":18, "name": "Virat" , "dateType":date("1988-11-05"), "timeType": time("03:10:00.493Z") , "boolType" : false , "doubleType" : 0.75, "datetimeType" : datetime("1900-02-01T00:00:00") });
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
new file mode 100644
index 0000000..c2606b7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.03.update.sqlpp
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+ select c.* from TestCollection c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-cover-data-types")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional binary name (UTF8);
+ optional int32 id;
+ optional int32 dateType (DATE);
+ optional int32 timeType (TIME_MILLIS);
+ optional boolean boolType ;
+ optional double doubleType ;
+ optional int64 datetimeType (TIMESTAMP_MILLIS);
+ }"
+};
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.04.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.04.ddl.sqlpp
new file mode 100644
index 0000000..310198e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.04.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE TYPE ColumnType2 AS {
+};
+
+
+
+CREATE EXTERNAL DATASET TestDataset(ColumnType2) USING S3
+(
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("region"="us-west-2"),
+ ("container"="playground"),
+ ("definition"="copy-to-result/parquet-cover-data-types/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.query.sqlpp
new file mode 100644
index 0000000..b03fc5e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM TestDataset c
+ORDER BY c.id;
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp
new file mode 100644
index 0000000..76970a5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.01.ddl.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ColumnType2 AS {
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
new file mode 100644
index 0000000..745ac87
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+COPY (
+ select "123" as id
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-simple")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional binary id (UTF8);
+ }"
+
+};
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
new file mode 100644
index 0000000..a5d7789
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE EXTERNAL DATASET DatasetCopy(ColumnType2) USING S3
+(
+("accessKeyId"="dummyAccessKey"),
+("secretAccessKey"="dummySecretKey"),
+("sessionToken"="dummySessionToken"),
+("region"="us-west-2"),
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("container"="playground"),
+ ("definition"="copy-to-result/parquet-simple"),
+ ("format" = "parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("include"="*.parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.04.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.04.query.sqlpp
new file mode 100644
index 0000000..5aeedb8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.04.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT id
+FROM DatasetCopy c
+ORDER BY c.id;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.01.ddl.sqlpp
new file mode 100644
index 0000000..f890e0d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.01.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ColumnType1 AS {
+ id: string
+};
+
+CREATE DATASET DummyTweetDataset(ColumnType1)
+PRIMARY KEY id WITH {
+ "storage-format": {"format" : "column"}
+};
+
+
+CREATE TYPE ColumnType2 AS {
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.02.update.sqlpp
new file mode 100644
index 0000000..83a1140
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.02.update.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+LOAD DATASET DummyTweetDataset USING localfs
+(
+ ("path" = "asterix_nc1://data/hdfs/parquet/dummy_tweet.json"),
+ ("format" = "json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
new file mode 100644
index 0000000..179d25c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.03.update.sqlpp
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+ SELECT c.* FROM DummyTweetDataset c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-tweet")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message schema {
+ optional group coordinates {
+ optional group coordinates (LIST) {
+ repeated group list {
+ optional double element;
+ }
+ }
+ optional binary type (UTF8);
+ }
+ optional binary created_at (UTF8);
+ optional group entities {
+ optional group urls (LIST) {
+ repeated group list {
+ optional group element {
+ optional binary display_url (UTF8);
+ optional binary expanded_url (UTF8);
+ optional group indices (LIST) {
+ repeated group list {
+ optional int64 element;
+ }
+ }
+ optional binary url (UTF8);
+ }
+ }
+ }
+ optional group user_mentions (LIST) {
+ repeated group list {
+ optional group element {
+ optional int64 id;
+ optional binary id_str (UTF8);
+ optional group indices (LIST) {
+ repeated group list {
+ optional int64 element;
+ }
+ }
+ optional binary name (UTF8);
+ optional binary screen_name (UTF8);
+ }
+ }
+ }
+ }
+ optional int64 favorite_count;
+ optional boolean favorited;
+ optional binary filter_level (UTF8);
+ optional group geo {
+ optional group coordinates (LIST) {
+ repeated group list {
+ optional double element;
+ }
+ }
+ optional binary type (UTF8);
+ }
+ optional binary id (UTF8);
+ optional binary id_str (UTF8);
+ optional binary in_reply_to_screen_name (UTF8);
+ optional int64 in_reply_to_status_id;
+ optional binary in_reply_to_status_id_str (UTF8);
+ optional int64 in_reply_to_user_id;
+ optional binary in_reply_to_user_id_str (UTF8);
+ optional boolean is_quote_status;
+ optional binary lang (UTF8);
+ optional group place {
+ optional group bounding_box {
+ optional group coordinates (LIST) {
+ repeated group list {
+ optional group element (LIST) {
+ repeated group list {
+ optional group element (LIST) {
+ repeated group list {
+ optional double element;
+ }
+ }
+ }
+ }
+ }
+ }
+ optional binary type (UTF8);
+ }
+ optional binary country (UTF8);
+ optional binary country_code (UTF8);
+ optional binary full_name (UTF8);
+ optional binary id (UTF8);
+ optional binary name (UTF8);
+ optional binary place_type (UTF8);
+ optional binary url (UTF8);
+ }
+ optional boolean possibly_sensitive;
+ optional group quoted_status {
+ optional binary created_at (UTF8);
+ optional group entities {
+ optional group user_mentions (LIST) {
+ repeated group list {
+ optional group element {
+ optional int64 id;
+ optional binary id_str (UTF8);
+ optional group indices (LIST) {
+ repeated group list {
+ optional int64 element;
+ }
+ }
+ optional binary name (UTF8);
+ optional binary screen_name (UTF8);
+ }
+ }
+ }
+ }
+ optional int64 favorite_count;
+ optional boolean favorited;
+ optional binary filter_level (UTF8);
+ optional int64 id;
+ optional binary id_str (UTF8);
+ optional binary in_reply_to_screen_name (UTF8);
+ optional int64 in_reply_to_status_id;
+ optional binary in_reply_to_status_id_str (UTF8);
+ optional int64 in_reply_to_user_id;
+ optional binary in_reply_to_user_id_str (UTF8);
+ optional boolean is_quote_status;
+ optional binary lang (UTF8);
+ optional int64 retweet_count;
+ optional boolean retweeted;
+ optional binary source (UTF8);
+ optional binary text (UTF8);
+ optional boolean truncated;
+ optional group user {
+ optional boolean contributors_enabled;
+ optional binary created_at (UTF8);
+ optional boolean default_profile;
+ optional boolean default_profile_image;
+ optional binary description (UTF8);
+ optional int64 favourites_count;
+ optional int64 followers_count;
+ optional int64 friends_count;
+ optional boolean geo_enabled;
+ optional int64 id;
+ optional binary id_str (UTF8);
+ optional boolean is_translator;
+ optional binary lang (UTF8);
+ optional int64 listed_count;
+ optional binary name (UTF8);
+ optional binary profile_background_color (UTF8);
+ optional binary profile_background_image_url (UTF8);
+ optional binary profile_background_image_url_https (UTF8);
+ optional boolean profile_background_tile;
+ optional binary profile_banner_url (UTF8);
+ optional binary profile_image_url (UTF8);
+ optional binary profile_image_url_https (UTF8);
+ optional binary profile_link_color (UTF8);
+ optional binary profile_sidebar_border_color (UTF8);
+ optional binary profile_sidebar_fill_color (UTF8);
+ optional binary profile_text_color (UTF8);
+ optional boolean profile_use_background_image;
+ optional boolean protected;
+ optional binary screen_name (UTF8);
+ optional int64 statuses_count;
+ optional boolean verified;
+ }
+ }
+ optional int64 quoted_status_id;
+ optional binary quoted_status_id_str (UTF8);
+ optional int64 retweet_count;
+ optional boolean retweeted;
+ optional binary source (UTF8);
+ optional binary text (UTF8);
+ optional binary timestamp_ms (UTF8);
+ optional boolean truncated;
+ optional group user {
+ optional boolean contributors_enabled;
+ optional binary created_at (UTF8);
+ optional boolean default_profile;
+ optional boolean default_profile_image;
+ optional binary description (UTF8);
+ optional int64 favourites_count;
+ optional int64 followers_count;
+ optional int64 friends_count;
+ optional boolean geo_enabled;
+ optional int64 id;
+ optional binary id_str (UTF8);
+ optional boolean is_translator;
+ optional binary lang (UTF8);
+ optional int64 listed_count;
+ optional binary location (UTF8);
+ optional binary name (UTF8);
+ optional binary profile_background_color (UTF8);
+ optional binary profile_background_image_url (UTF8);
+ optional binary profile_background_image_url_https (UTF8);
+ optional boolean profile_background_tile;
+ optional binary profile_banner_url (UTF8);
+ optional binary profile_image_url (UTF8);
+ optional binary profile_image_url_https (UTF8);
+ optional binary profile_link_color (UTF8);
+ optional binary profile_sidebar_border_color (UTF8);
+ optional binary profile_sidebar_fill_color (UTF8);
+ optional binary profile_text_color (UTF8);
+ optional boolean profile_use_background_image;
+ optional boolean protected;
+ optional binary screen_name (UTF8);
+ optional int64 statuses_count;
+ optional binary time_zone (UTF8);
+ optional binary url (UTF8);
+ optional int64 utc_offset;
+ optional boolean verified;
+ }
+ }"
+};
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.04.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.04.ddl.sqlpp
new file mode 100644
index 0000000..1cf0c78
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.04.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+CREATE EXTERNAL DATASET DummyTweetDatasetCopy(ColumnType2) USING S3
+(
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("region"="us-west-2"),
+ ("container"="playground"),
+ ("definition"="copy-to-result/parquet-tweet/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.05.query.sqlpp
new file mode 100644
index 0000000..13587f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-tweet/parquet-tweet.05.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM DummyTweetDatasetCopy c
+ORDER BY c.id;
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.01.ddl.sqlpp
new file mode 100644
index 0000000..dfc64ce
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.01.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test if exists;
+CREATE DATAVERSE test;
+USE test;
+
+CREATE TYPE ColumnType1 AS {
+ id: int
+};
+
+CREATE DATASET NameCommentDataset(ColumnType1)
+PRIMARY KEY id WITH {
+ "storage-format": {"format" : "column"}
+};
+
+
+CREATE TYPE ColumnType2 AS {
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.02.update.sqlpp
new file mode 100644
index 0000000..8591369
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.02.update.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+LOAD DATASET NameCommentDataset USING localfs
+(
+ ("path" = "asterix_nc1://data/hdfs/parquet/id_name_comment.json"),
+ ("format" = "json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
new file mode 100644
index 0000000..ef76ad1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.03.update.sqlpp
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+COPY (
+ SELECT c.* FROM NameCommentDataset c
+) toWriter
+TO S3
+PATH ("copy-to-result", "parquet-utf8")
+WITH {
+ "accessKeyId":"dummyAccessKey",
+ "secretAccessKey":"dummySecretKey",
+ "region":"us-west-2",
+ "serviceEndpoint":"http://127.0.0.1:8001",
+ "container":"playground",
+ "format":"parquet",
+ "schema":"message spark_schema {
+ optional binary comment (UTF8);
+ optional int64 id;
+ optional binary name (UTF8);
+ }"
+};
+
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.04.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.04.ddl.sqlpp
new file mode 100644
index 0000000..4fd41f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.04.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+CREATE EXTERNAL DATASET NameCommentDatasetCopy(ColumnType2) USING S3
+(
+ ("serviceEndpoint"="http://127.0.0.1:8001"),
+ ("region"="us-west-2"),
+ ("container"="playground"),
+ ("definition"="copy-to-result/parquet-utf8/"),
+ ("include"="*.parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("format" = "parquet")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.05.query.sqlpp
new file mode 100644
index 0000000..17cd027
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-utf8/parquet-utf8.05.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+SELECT c.*
+FROM NameCommentDatasetCopy c
+ORDER BY c.id;
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
new file mode 100644
index 0000000..8fc863e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-cover-data-types/parquet-cover-data-types.05.adm
@@ -0,0 +1 @@
+{ "name": "Virat", "id": 18, "dateType": date("1988-11-05"), "timeType": time("03:10:00.493"), "boolType": false, "doubleType": 0.75, "datetimeType": datetime("1900-02-01T00:00:00.000") }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
new file mode 100644
index 0000000..bf567b2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
@@ -0,0 +1 @@
+{ "id": "123" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
new file mode 100644
index 0000000..5e0df96
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-tweet/parquet-tweet.05.adm
@@ -0,0 +1,2 @@
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "entities": { "urls": [ { "display_url": "string", "expanded_url": "string", "indices": [ 1 ], "url": "string" } ], "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "0000000", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
+{ "coordinates": { "coordinates": [ 1.1 ], "type": "string" }, "created_at": "string", "favorite_count": 1, "favorited": true, "filter_level": "string", "geo": { "coordinates": [ 1.1 ], "type": "string" }, "id": "11111111111111111111", "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "place": { "bounding_box": { "coordinates": [ [ [ 1.1 ] ] ], "type": "string" }, "country": "string", "country_code": "string", "full_name": "string", "id": "string", "name": "string", "place_type": "string", "url": "string" }, "possibly_sensitive": true, "quoted_status": { "created_at": "string", "entities": { "user_mentions": [ { "id": 1, "id_str": "string", "indices": [ 1 ], "name": "string", "screen_name": "string" } ] }, "favorite_count": 1, "favorited": true, "filter_level": "string", "id": 1, "id_str": "string", "in_reply_to_screen_name": "string", "in_reply_to_status_id": 1, "in_reply_to_status_id_str": "string", "in_reply_to_user_id": 1, "in_reply_to_user_id_str": "string", "is_quote_status": true, "lang": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "verified": true } }, "quoted_status_id": 1, "quoted_status_id_str": "string", "retweet_count": 1, "retweeted": true, "source": "string", "text": "string", "timestamp_ms": "string", "truncated": true, "user": { "contributors_enabled": true, "created_at": "string", "default_profile": true, "default_profile_image": true, "description": "string", "favourites_count": 1, "followers_count": 1, "friends_count": 1, "geo_enabled": true, "id": 1, "id_str": "string", "is_translator": true, "lang": "string", "listed_count": 1, "location": "string", "name": "string", "profile_background_color": "string", "profile_background_image_url": "string", "profile_background_image_url_https": "string", "profile_background_tile": true, "profile_banner_url": "string", "profile_image_url": "string", "profile_image_url_https": "string", "profile_link_color": "string", "profile_sidebar_border_color": "string", "profile_sidebar_fill_color": "string", "profile_text_color": "string", "profile_use_background_image": true, "protected": true, "screen_name": "string", "statuses_count": 1, "time_zone": "string", "url": "string", "utc_offset": 1, "verified": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
new file mode 100644
index 0000000..c60145d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-utf8/parquet-utf8.05.adm
@@ -0,0 +1,8 @@
+{ "id": 1, "name": "John" }
+{ "id": 2, "name": "Abel" }
+{ "id": 3, "name": "Sandy" }
+{ "id": 4, "name": "Alex" }
+{ "id": 5, "name": "Mike" }
+{ "id": 6, "name": "Tom" }
+{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 7, "name": "Jerry" }
+{ "comment": "😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا😢😢💉💉 = 𩸽 😢😢💉💉. Coffee ☕‼️😃. حسنا", "id": 8, "name": "William" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index db61282..8e31aa4 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -40,6 +40,26 @@
</compilation-unit>
</test-case>
<test-case FilePath="copy-to">
+ <compilation-unit name="parquet-simple">
+ <output-dir compare="Text">parquet-simple</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
+ <compilation-unit name="parquet-tweet">
+ <output-dir compare="Text">parquet-tweet</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
+ <compilation-unit name="parquet-utf8">
+ <output-dir compare="Text">parquet-utf8</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
+ <compilation-unit name="parquet-cover-data-types">
+ <output-dir compare="Text">parquet-cover-data-types</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
<compilation-unit name="empty-path">
<output-dir compare="Text">empty-path</output-dir>
</compilation-unit>
@@ -80,8 +100,21 @@
<compilation-unit name="supported-adapter-format-compression">
<output-dir compare="Text">supported-adapter-format-compression</output-dir>
<expected-error>ASX1188: Unsupported writing adapter 'AZUREBLOB'. Supported adapters: [gcs, localfs, s3]</expected-error>
- <expected-error>ASX1189: Unsupported writing format 'csv'. Supported formats: [json]</expected-error>
- <expected-error>ASX1096: Unknown compression scheme rar. Supported schemes are [gzip]</expected-error>
+ <expected-error>ASX1189: Unsupported writing format 'csv'. Supported formats: [json, parquet]</expected-error>
+ <expected-error>ASX1202: Unsupported compression scheme rar. Supported schemes for json are [gzip]</expected-error>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to/negative">
+ <compilation-unit name="parquet-error-checks">
+ <output-dir compare="Text">parquet-error-checks</output-dir>
+ <expected-error>HYR0131: Invalid parquet schema provided</expected-error>
+ <expected-error>ASX0037: Type mismatch: expected value of type integer, but got the value of type BINARY</expected-error>
+ <expected-error>HYR0133: Extra field in the result, field 'second' does not exist at 'nested' in the schema</expected-error>
+ <expected-error>HYR0132: Result does not follow the schema, group type expected but found primitive type at 'nested'</expected-error>
+ <expected-error>HYR0132: Result does not follow the schema, primitive type expected but found group type at 'name'</expected-error>
+ <expected-error>ASX1201: Storage units expected for the field 'row-group-size' (e.g., 0.1KB, 100kb, 1mb, 3MB, 8.5GB ...). Provided 'random'</expected-error>
+ <expected-error>ASX1201: Storage units expected for the field 'page-size' (e.g., 0.1KB, 100kb, 1mb, 3MB, 8.5GB ...). Provided 'random'</expected-error>
+ <expected-error>ASX1202: Unsupported compression scheme rar. Supported schemes for parquet are [gzip, snappy, zstd]</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="copy-to/negative">
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 3364600..5e59b97 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -304,6 +304,8 @@
DUPLICATE_FIELD_IN_PRIMARY_KEY(1198),
INCOMPATIBLE_FIELDS_IN_PRIMARY_KEY(1199),
PREFIX_SHOULD_NOT_START_WITH_SLASH(1200),
+ ILLEGAL_SIZE_PROVIDED(1201),
+ UNSUPPORTED_WRITER_COMPRESSION_SCHEME(1202),
// Feed errors
DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 4b7da0c..a07dddf 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -305,6 +305,8 @@
1198 = Duplicate field '%1$s' in primary key
1199 = Fields '%1$s' and '%2$s' are incompatible for primary key
1200 = Prefix should not start with "/". Prefix: '%1$s'
+1201 = Storage units expected for the field '%1$s' (e.g., 0.1KB, 100kb, 1mb, 3MB, 8.5GB ...). Provided '%2$s'
+1202 = Unsupported compression scheme %1$s. Supported schemes for %2$s are %3$s
# Feed Errors
3001 = Illegal state.
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 3139be7..6a4b336 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -82,6 +82,11 @@
public static final String KEY_EXPRESSION = "expression";
public static final String KEY_LOCAL_SOCKET_PATH = "local-socket-path";
public static final String KEY_FORMAT = "format";
+ public static final String KEY_SCHEMA = "schema";
+ public static final String KEY_PARQUET_ROW_GROUP_SIZE = "row-group-size";
+ public static final String PARQUET_DEFAULT_ROW_GROUP_SIZE = "10MB";
+ public static final String KEY_PARQUET_PAGE_SIZE = "page-size";
+ public static final String PARQUET_DEFAULT_PAGE_SIZE = "8KB";
public static final String KEY_INCLUDE = "include";
public static final String KEY_EXCLUDE = "exclude";
public static final String KEY_QUOTE = "quote";
@@ -161,6 +166,8 @@
public static final String CLASS_NAME_PARQUET_INPUT_FORMAT =
"org.apache.asterix.external.input.record.reader.hdfs.parquet.MapredParquetInputFormat";
public static final String CLASS_NAME_HDFS_FILESYSTEM = "org.apache.hadoop.hdfs.DistributedFileSystem";
+ public static final String S3A_CHANGE_DETECTION_REQUIRED = "requireVersionChangeDetection";
+ public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY = "fs.s3a.change.detection.version.required";
/**
* input formats aliases
*/
@@ -306,6 +313,8 @@
* Compression constants
*/
public static final String KEY_COMPRESSION_GZIP = "gzip";
+ public static final String KEY_COMPRESSION_SNAPPY = "snappy";
+ public static final String KEY_COMPRESSION_ZSTD = "zstd";
public static final String KEY_COMPRESSION_GZIP_COMPRESSION_LEVEL = "gzipCompressionLevel";
/**
@@ -318,13 +327,17 @@
public static final int WRITER_MAX_RESULT_MINIMUM = 1000;
public static final Set<String> WRITER_SUPPORTED_FORMATS;
public static final Set<String> WRITER_SUPPORTED_ADAPTERS;
- public static final Set<String> WRITER_SUPPORTED_COMPRESSION;
+ public static final Set<String> TEXTUAL_WRITER_SUPPORTED_COMPRESSION;
+ public static final Set<String> PARQUET_WRITER_SUPPORTED_COMPRESSION;
+ public static final int PARQUET_DICTIONARY_PAGE_SIZE = 1048576;
static {
- WRITER_SUPPORTED_FORMATS = Set.of(FORMAT_JSON_LOWER_CASE);
+ WRITER_SUPPORTED_FORMATS = Set.of(FORMAT_JSON_LOWER_CASE, FORMAT_PARQUET);
WRITER_SUPPORTED_ADAPTERS = Set.of(ALIAS_LOCALFS_ADAPTER.toLowerCase(), KEY_ADAPTER_NAME_AWS_S3.toLowerCase(),
KEY_ADAPTER_NAME_GCS.toLowerCase());
- WRITER_SUPPORTED_COMPRESSION = Set.of(KEY_COMPRESSION_GZIP);
+ TEXTUAL_WRITER_SUPPORTED_COMPRESSION = Set.of(KEY_COMPRESSION_GZIP);
+ PARQUET_WRITER_SUPPORTED_COMPRESSION =
+ Set.of(KEY_COMPRESSION_GZIP, KEY_COMPRESSION_SNAPPY, KEY_COMPRESSION_ZSTD);
}
public static class ParquetOptions {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index f7638b4..6bc013a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -218,6 +218,11 @@
configureParquet(configuration, conf);
}
+ if (configuration.containsKey(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED)) {
+ conf.set(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY,
+ configuration.get(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED));
+ }
+
return conf;
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/WriterValidationUtil.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/WriterValidationUtil.java
index 843600e..0d82dd9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/WriterValidationUtil.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/WriterValidationUtil.java
@@ -20,6 +20,10 @@
import static org.apache.asterix.common.exceptions.ErrorCode.INVALID_REQ_PARAM_VAL;
import static org.apache.asterix.common.exceptions.ErrorCode.MINIMUM_VALUE_ALLOWED_FOR_PARAM;
+import static org.apache.asterix.external.util.ExternalDataConstants.FORMAT_JSON_LOWER_CASE;
+import static org.apache.asterix.external.util.ExternalDataConstants.FORMAT_PARQUET;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_PARQUET_PAGE_SIZE;
+import static org.apache.asterix.external.util.ExternalDataConstants.KEY_PARQUET_ROW_GROUP_SIZE;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_WRITER_MAX_RESULT;
import static org.apache.asterix.external.util.ExternalDataConstants.WRITER_MAX_RESULT_MINIMUM;
@@ -31,6 +35,7 @@
import org.apache.asterix.common.exceptions.CompilationException;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.hyracks.api.exceptions.SourceLocation;
+import org.apache.hyracks.util.StorageUtil;
public class WriterValidationUtil {
@@ -41,7 +46,6 @@
Map<String, String> configuration, SourceLocation sourceLocation) throws CompilationException {
validateAdapter(adapter, supportedAdapters, sourceLocation);
validateFormat(configuration, sourceLocation);
- validateCompression(configuration, sourceLocation);
validateMaxResult(configuration, sourceLocation);
}
@@ -56,14 +60,66 @@
String format = configuration.get(ExternalDataConstants.KEY_FORMAT);
checkSupported(ExternalDataConstants.KEY_FORMAT, format, ExternalDataConstants.WRITER_SUPPORTED_FORMATS,
ErrorCode.UNSUPPORTED_WRITING_FORMAT, sourceLocation, false);
+ switch (format.toLowerCase()) {
+ case FORMAT_JSON_LOWER_CASE:
+ validateJSON(configuration, sourceLocation);
+ break;
+ case FORMAT_PARQUET:
+ validateParquet(configuration, sourceLocation);
+ break;
+ }
}
- private static void validateCompression(Map<String, String> configuration, SourceLocation sourceLocation)
+ private static void validateParquet(Map<String, String> configuration, SourceLocation sourceLocation)
+ throws CompilationException {
+ validateParquetCompression(configuration, sourceLocation);
+ validateParquetRowGroupSize(configuration);
+ validateParquetPageSize(configuration);
+ }
+
+ private static void validateParquetRowGroupSize(Map<String, String> configuration) throws CompilationException {
+ String rowGroupSize = configuration.get(KEY_PARQUET_ROW_GROUP_SIZE);
+ if (rowGroupSize == null)
+ return;
+ try {
+ StorageUtil.getByteValue(rowGroupSize);
+ } catch (IllegalArgumentException e) {
+ throw CompilationException.create(ErrorCode.ILLEGAL_SIZE_PROVIDED, KEY_PARQUET_ROW_GROUP_SIZE,
+ rowGroupSize);
+ }
+ }
+
+ private static void validateParquetPageSize(Map<String, String> configuration) throws CompilationException {
+ String pageSize = configuration.get(KEY_PARQUET_PAGE_SIZE);
+ if (pageSize == null)
+ return;
+ try {
+ StorageUtil.getByteValue(pageSize);
+ } catch (IllegalArgumentException e) {
+ throw CompilationException.create(ErrorCode.ILLEGAL_SIZE_PROVIDED, KEY_PARQUET_PAGE_SIZE, pageSize);
+ }
+ }
+
+ private static void validateJSON(Map<String, String> configuration, SourceLocation sourceLocation)
+ throws CompilationException {
+ validateTextualCompression(configuration, sourceLocation);
+ }
+
+ private static void validateParquetCompression(Map<String, String> configuration, SourceLocation sourceLocation)
throws CompilationException {
String compression = configuration.get(ExternalDataConstants.KEY_WRITER_COMPRESSION);
- checkSupported(ExternalDataConstants.KEY_WRITER_COMPRESSION, compression,
- ExternalDataConstants.WRITER_SUPPORTED_COMPRESSION, ErrorCode.UNKNOWN_COMPRESSION_SCHEME,
- sourceLocation, true);
+ checkCompressionSupported(ExternalDataConstants.KEY_WRITER_COMPRESSION, compression,
+ ExternalDataConstants.PARQUET_WRITER_SUPPORTED_COMPRESSION,
+ ErrorCode.UNSUPPORTED_WRITER_COMPRESSION_SCHEME, sourceLocation, FORMAT_PARQUET, true);
+ }
+
+ private static void validateTextualCompression(Map<String, String> configuration, SourceLocation sourceLocation)
+ throws CompilationException {
+ String compression = configuration.get(ExternalDataConstants.KEY_WRITER_COMPRESSION);
+ checkCompressionSupported(ExternalDataConstants.KEY_WRITER_COMPRESSION, compression,
+ ExternalDataConstants.TEXTUAL_WRITER_SUPPORTED_COMPRESSION,
+ ErrorCode.UNSUPPORTED_WRITER_COMPRESSION_SCHEME, sourceLocation,
+ configuration.get(ExternalDataConstants.KEY_FORMAT), true);
if (ExternalDataUtils.isGzipCompression(compression)) {
validateGzipCompressionLevel(configuration, sourceLocation);
}
@@ -121,4 +177,22 @@
}
}
+ private static void checkCompressionSupported(String paramKey, String value, Set<String> supportedSet,
+ ErrorCode errorCode, SourceLocation sourceLocation, String format, boolean optional)
+ throws CompilationException {
+ if (optional && value == null) {
+ return;
+ }
+
+ if (value == null) {
+ throw new CompilationException(ErrorCode.PARAMETERS_REQUIRED, sourceLocation, paramKey);
+ }
+
+ String normalizedValue = value.toLowerCase();
+ if (!supportedSet.contains(normalizedValue)) {
+ List<String> sorted = supportedSet.stream().sorted().collect(Collectors.toList());
+ throw CompilationException.create(errorCode, sourceLocation, value, format, sorted.toString());
+ }
+ }
+
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
new file mode 100644
index 0000000..b07e857
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinter.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.external.writer.printer;
+
+import static org.apache.parquet.schema.MessageTypeParser.parseMessageType;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.asterix.external.input.record.reader.hdfs.parquet.AsterixParquetRuntimeException;
+import org.apache.asterix.external.util.ExternalDataConstants;
+import org.apache.asterix.external.writer.printer.parquet.AsterixParquetWriter;
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.runtime.writer.IExternalPrinter;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.schema.MessageType;
+
+public class ParquetExternalFilePrinter implements IExternalPrinter {
+ private final IAType typeInfo;
+ private final CompressionCodecName compressionCodecName;
+ private String schemaString;
+ private MessageType schema;
+ private ParquetOutputFile parquetOutputFile;
+ private ParquetWriter<IValueReference> writer;
+ private final long rowGroupSize;
+ private final int pageSize;
+
+ public ParquetExternalFilePrinter(CompressionCodecName compressionCodecName, String schemaString, IAType typeInfo,
+ long rowGroupSize, int pageSize) {
+ this.compressionCodecName = compressionCodecName;
+ this.schemaString = schemaString.replace('\r', ' ');
+ this.typeInfo = typeInfo;
+ this.rowGroupSize = rowGroupSize;
+ this.pageSize = pageSize;
+ }
+
+ @Override
+ public void open() throws HyracksDataException {
+ try {
+ this.schema = parseMessageType(schemaString);
+ } catch (IllegalArgumentException e) {
+ throw new HyracksDataException(ErrorCode.ILLGEAL_PARQUET_SCHEMA);
+ }
+ }
+
+ @Override
+ public void newStream(OutputStream outputStream) throws HyracksDataException {
+ if (parquetOutputFile != null) {
+ close();
+ }
+ parquetOutputFile = new ParquetOutputFile(outputStream);
+ Configuration conf = new Configuration();
+
+ try {
+ writer = AsterixParquetWriter.builder(parquetOutputFile).withCompressionCodec(compressionCodecName)
+ .withType(schema).withTypeInfo(typeInfo).withRowGroupSize(rowGroupSize).withPageSize(pageSize)
+ .withDictionaryPageSize(ExternalDataConstants.PARQUET_DICTIONARY_PAGE_SIZE)
+ .enableDictionaryEncoding().withValidation(false)
+ .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_2_0).withConf(conf).build();
+ } catch (IOException e) {
+ throw HyracksDataException.create(e);
+ }
+
+ }
+
+ @Override
+ public void print(IValueReference value) throws HyracksDataException {
+ try {
+ this.writer.write(value);
+ } catch (AsterixParquetRuntimeException e) {
+ throw e.getHyracksDataException();
+ } catch (IOException e) {
+ throw HyracksDataException.create(e);
+ }
+ }
+
+ @Override
+ public void close() throws HyracksDataException {
+ if (this.writer != null) {
+ try {
+ this.writer.close();
+ } catch (IOException e) {
+ throw HyracksDataException.create(e);
+ }
+ }
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java
new file mode 100644
index 0000000..53975d2
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetExternalFilePrinterFactory.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer;
+
+import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.runtime.writer.IExternalPrinter;
+import org.apache.asterix.runtime.writer.IExternalPrinterFactory;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+
+public class ParquetExternalFilePrinterFactory implements IExternalPrinterFactory {
+ private static final long serialVersionUID = 8971234908711234L;
+ private final String schema;
+ private final IAType typeInfo;
+ private final CompressionCodecName compressionCodecName;
+ private final long rowGroupSize;
+ private final int pageSize;
+
+ public ParquetExternalFilePrinterFactory(CompressionCodecName compressionCodecName, String schema, IAType typeInfo,
+ long rowGroupSize, int pageSize) {
+ this.compressionCodecName = compressionCodecName;
+ this.schema = schema;
+ this.typeInfo = typeInfo;
+ this.rowGroupSize = rowGroupSize;
+ this.pageSize = pageSize;
+ }
+
+ @Override
+ public IExternalPrinter createPrinter() {
+ return new ParquetExternalFilePrinter(compressionCodecName, schema, typeInfo, rowGroupSize, pageSize);
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
new file mode 100644
index 0000000..cc54676
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/ParquetOutputFile.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.external.writer.printer;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.parquet.hadoop.util.HadoopStreams;
+import org.apache.parquet.io.OutputFile;
+import org.apache.parquet.io.PositionOutputStream;
+
+public class ParquetOutputFile implements OutputFile {
+ private final FSDataOutputStream fs;
+
+ public ParquetOutputFile(OutputStream os) {
+ this.fs = new FSDataOutputStream(os, new FileSystem.Statistics("test"));
+ }
+
+ @Override
+ public PositionOutputStream create(long blockSizeHint) throws IOException {
+ return HadoopStreams.wrap(fs);
+ }
+
+ @Override
+ public PositionOutputStream createOrOverwrite(long blockSizeHint) throws IOException {
+ return HadoopStreams.wrap(fs);
+ }
+
+ @Override
+ public boolean supportsBlockSize() {
+ return false;
+ }
+
+ @Override
+ public long defaultBlockSize() {
+ return 33554432L;
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetWriter.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetWriter.java
new file mode 100644
index 0000000..edeab1f
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/AsterixParquetWriter.java
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer.parquet;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.asterix.om.types.IAType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.parquet.column.ParquetProperties;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+import org.apache.parquet.io.OutputFile;
+import org.apache.parquet.schema.MessageType;
+
+public class AsterixParquetWriter extends ParquetWriter<IValueReference> {
+ public static Builder builder(Path file) {
+ return new Builder(file);
+ }
+
+ public static Builder builder(OutputFile file) {
+ return new Builder(file);
+ }
+
+ AsterixParquetWriter(Path file, WriteSupport<IValueReference> writeSupport,
+ CompressionCodecName compressionCodecName, int blockSize, int pageSize, boolean enableDictionary,
+ boolean enableValidation, ParquetProperties.WriterVersion writerVersion, Configuration conf)
+ throws IOException {
+ super(file, writeSupport, compressionCodecName, blockSize, pageSize, pageSize, enableDictionary,
+ enableValidation, writerVersion, conf);
+ }
+
+ public static class Builder extends ParquetWriter.Builder<IValueReference, Builder> {
+ private MessageType type;
+ private IAType typeInfo;
+ private Map<String, String> extraMetaData;
+
+ private Builder(Path file) {
+ super(file);
+ this.type = null;
+ this.extraMetaData = new HashMap();
+ }
+
+ private Builder(OutputFile file) {
+ super(file);
+ this.type = null;
+ this.extraMetaData = new HashMap();
+ }
+
+ public Builder withType(MessageType type) {
+ this.type = type;
+ return this;
+ }
+
+ public Builder withTypeInfo(IAType typeInfo) {
+ this.typeInfo = typeInfo;
+ return this;
+ }
+
+ public Builder withExtraMetaData(Map<String, String> extraMetaData) {
+ this.extraMetaData = extraMetaData;
+ return this;
+ }
+
+ protected Builder self() {
+ return this;
+ }
+
+ protected WriteSupport<IValueReference> getWriteSupport(Configuration conf) {
+ return new ObjectWriteSupport(this.type, this.typeInfo, this.extraMetaData);
+ }
+ }
+
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
new file mode 100644
index 0000000..f36df1a
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/FieldNamesDictionary.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer.parquet;
+
+import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
+import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
+
+public class FieldNamesDictionary {
+ private final IBinaryHashFunction fieldNameHashFunction;
+ private final Int2ObjectMap<String> hashToFieldNameIndexMap;
+
+ public FieldNamesDictionary() {
+ fieldNameHashFunction =
+ new PointableBinaryHashFunctionFactory(UTF8StringPointable.FACTORY).createBinaryHashFunction();
+ hashToFieldNameIndexMap = new Int2ObjectOpenHashMap<>();
+ }
+
+ //TODO solve collision (they're so rare that I haven't seen any)
+ public String getOrCreateFieldNameIndex(IValueReference pointable) throws HyracksDataException {
+
+ int hash = getHash(pointable);
+ if (!hashToFieldNameIndexMap.containsKey(hash)) {
+ String fieldName = UTF8StringUtil.toString(pointable.getByteArray(), pointable.getStartOffset());
+ hashToFieldNameIndexMap.put(hash, fieldName);
+ return fieldName;
+ }
+ return hashToFieldNameIndexMap.get(hash);
+ }
+
+ private int getHash(IValueReference fieldName) throws HyracksDataException {
+ byte[] object = fieldName.getByteArray();
+ int start = fieldName.getStartOffset();
+ int length = fieldName.getLength();
+ return fieldNameHashFunction.hash(object, start, length);
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ObjectWriteSupport.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ObjectWriteSupport.java
new file mode 100644
index 0000000..512b523
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ObjectWriteSupport.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer.parquet;
+
+import java.util.Map;
+
+import org.apache.asterix.external.input.record.reader.hdfs.parquet.AsterixParquetRuntimeException;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.parquet.hadoop.api.WriteSupport;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.MessageType;
+
+public class ObjectWriteSupport extends WriteSupport<IValueReference> {
+ private MessageType schema;
+
+ private RecordConsumer recordConsumer;
+ private Map<String, String> extraMetaData;
+ ParquetRecordLazyVisitor parquetRecordLazyVisitor;
+
+ public ObjectWriteSupport(MessageType schema, IAType typeInfo, Map<String, String> extraMetaData) {
+ this.schema = schema;
+ this.extraMetaData = extraMetaData;
+ parquetRecordLazyVisitor = new ParquetRecordLazyVisitor(schema, typeInfo);
+ }
+
+ public String getName() {
+ return "asterix";
+ }
+
+ public WriteSupport.WriteContext init(Configuration configuration) {
+ return new WriteSupport.WriteContext(this.schema, this.extraMetaData);
+ }
+
+ public void prepareForWrite(RecordConsumer recordConsumer) {
+ this.recordConsumer = recordConsumer;
+ }
+
+ @Override
+ public void write(IValueReference valueReference) {
+ try {
+ parquetRecordLazyVisitor.consumeRecord(valueReference, recordConsumer);
+ } catch (HyracksDataException e) {
+ throw new AsterixParquetRuntimeException(e);
+ }
+ }
+
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
new file mode 100644
index 0000000..6ad9608
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordLazyVisitor.java
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer.parquet;
+
+import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.ELEMENT_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.GROUP_TYPE_ERROR_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.LIST_FIELD;
+import static org.apache.asterix.external.writer.printer.parquet.ParquetRecordVisitorUtils.PRIMITIVE_TYPE_ERROR_FIELD;
+
+import org.apache.asterix.om.lazy.AbstractLazyVisitablePointable;
+import org.apache.asterix.om.lazy.AbstractListLazyVisitablePointable;
+import org.apache.asterix.om.lazy.FlatLazyVisitablePointable;
+import org.apache.asterix.om.lazy.ILazyVisitablePointableVisitor;
+import org.apache.asterix.om.lazy.RecordLazyVisitablePointable;
+import org.apache.asterix.om.lazy.TypedRecordLazyVisitablePointable;
+import org.apache.asterix.om.types.ARecordType;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.api.exceptions.ErrorCode;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.Type;
+
+public class ParquetRecordLazyVisitor implements ILazyVisitablePointableVisitor<Void, Type> {
+
+ private final MessageType schema;
+ private final IAType typeInfo;
+ private final RecordLazyVisitablePointable rec;
+ private RecordConsumer recordConsumer;
+ private FieldNamesDictionary fieldNamesDictionary;
+
+ private final ParquetRecordVisitorUtils parquetRecordVisitorUtils;
+
+ public ParquetRecordLazyVisitor(MessageType schema, IAType typeInfo) {
+ this.schema = schema;
+ this.typeInfo = typeInfo;
+ if (typeInfo.getTypeTag() == ATypeTag.OBJECT) {
+ this.rec = new TypedRecordLazyVisitablePointable((ARecordType) typeInfo);
+ } else if (typeInfo.getTypeTag() == ATypeTag.ANY) {
+ this.rec = new RecordLazyVisitablePointable(true);
+ } else {
+ throw new RuntimeException("Type Unsupported for parquet printing");
+ }
+ this.fieldNamesDictionary = new FieldNamesDictionary();
+ this.parquetRecordVisitorUtils = new ParquetRecordVisitorUtils();
+ }
+
+ public MessageType getSchema() {
+ return schema;
+ }
+
+ @Override
+ public Void visit(RecordLazyVisitablePointable pointable, Type type) throws HyracksDataException {
+
+ if (type.isPrimitive()) {
+ throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, GROUP_TYPE_ERROR_FIELD,
+ PRIMITIVE_TYPE_ERROR_FIELD, type.getName());
+ }
+ GroupType groupType = type.asGroupType();
+ recordConsumer.startGroup();
+
+ for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
+ pointable.nextChild();
+ AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable();
+ String columnName = fieldNamesDictionary.getOrCreateFieldNameIndex(pointable.getFieldName());
+
+ if (!groupType.containsField(columnName)) {
+ throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, columnName,
+ groupType.getName());
+ }
+ recordConsumer.startField(columnName, groupType.getFieldIndex(columnName));
+ child.accept(this, groupType.getType(columnName));
+ recordConsumer.endField(columnName, groupType.getFieldIndex(columnName));
+ }
+ recordConsumer.endGroup();
+ return null;
+ }
+
+ @Override
+ public Void visit(AbstractListLazyVisitablePointable pointable, Type type) throws HyracksDataException {
+
+ if (type.isPrimitive()) {
+ throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, GROUP_TYPE_ERROR_FIELD,
+ PRIMITIVE_TYPE_ERROR_FIELD, type.getName());
+ }
+ GroupType groupType = type.asGroupType();
+
+ if (!groupType.containsField(LIST_FIELD)) {
+ throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, LIST_FIELD,
+ groupType.getName());
+ }
+
+ if (groupType.getType(LIST_FIELD).isPrimitive()) {
+ throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, GROUP_TYPE_ERROR_FIELD,
+ PRIMITIVE_TYPE_ERROR_FIELD, LIST_FIELD);
+ }
+
+ GroupType listType = groupType.getType(LIST_FIELD).asGroupType();
+
+ if (!listType.containsField(ELEMENT_FIELD)) {
+ throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, ELEMENT_FIELD,
+ listType.getName());
+ }
+
+ recordConsumer.startGroup();
+ recordConsumer.startField(LIST_FIELD, groupType.getFieldIndex(LIST_FIELD));
+
+ for (int i = 0; i < pointable.getNumberOfChildren(); i++) {
+ pointable.nextChild();
+ AbstractLazyVisitablePointable child = pointable.getChildVisitablePointable();
+
+ recordConsumer.startGroup();
+ recordConsumer.startField(ELEMENT_FIELD, listType.getFieldIndex(ELEMENT_FIELD));
+
+ child.accept(this, listType.getType(ELEMENT_FIELD));
+
+ recordConsumer.endField(ELEMENT_FIELD, listType.getFieldIndex(ELEMENT_FIELD));
+ recordConsumer.endGroup();
+
+ }
+
+ recordConsumer.endField(LIST_FIELD, groupType.getFieldIndex(LIST_FIELD));
+ recordConsumer.endGroup();
+ return null;
+ }
+
+ @Override
+ public Void visit(FlatLazyVisitablePointable pointable, Type type) throws HyracksDataException {
+
+ if (!type.isPrimitive()) {
+ throw new HyracksDataException(ErrorCode.RESULT_DOES_NOT_FOLLOW_SCHEMA, PRIMITIVE_TYPE_ERROR_FIELD,
+ GROUP_TYPE_ERROR_FIELD, type.getName());
+ }
+ parquetRecordVisitorUtils.addValueToColumn(recordConsumer, pointable, type.asPrimitiveType());
+ return null;
+ }
+
+ public void consumeRecord(IValueReference valueReference, RecordConsumer recordConsumer)
+ throws HyracksDataException {
+ rec.set(valueReference);
+ this.recordConsumer = recordConsumer;
+
+ recordConsumer.startMessage();
+ for (int i = 0; i < rec.getNumberOfChildren(); i++) {
+ rec.nextChild();
+ String columnName = fieldNamesDictionary.getOrCreateFieldNameIndex(rec.getFieldName());
+ AbstractLazyVisitablePointable child = rec.getChildVisitablePointable();
+
+ if (!schema.containsField(columnName)) {
+ throw new HyracksDataException(ErrorCode.EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA, columnName,
+ schema.getName());
+ }
+
+ recordConsumer.startField(columnName, schema.getFieldIndex(columnName));
+ child.accept(this, schema.getType(columnName));
+ recordConsumer.endField(columnName, schema.getFieldIndex(columnName));
+ }
+ recordConsumer.endMessage();
+ }
+
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
new file mode 100644
index 0000000..54978c3
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/writer/printer/parquet/ParquetRecordVisitorUtils.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.writer.printer.parquet;
+
+import java.io.IOException;
+
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
+import org.apache.asterix.dataflow.data.nontagged.printers.PrintTools;
+import org.apache.asterix.dataflow.data.nontagged.serde.ABooleanSerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.ADateSerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.ADateTimeSerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.ADoubleSerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.AFloatSerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.AInt16SerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.AInt32SerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.AInt64SerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.AInt8SerializerDeserializer;
+import org.apache.asterix.dataflow.data.nontagged.serde.ATimeSerializerDeserializer;
+import org.apache.asterix.om.lazy.FlatLazyVisitablePointable;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.utils.ResettableByteArrayOutputStream;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.PrimitiveType;
+
+public class ParquetRecordVisitorUtils {
+
+ public static final String LIST_FIELD = "list";
+ public static final String ELEMENT_FIELD = "element";
+
+ public static final String GROUP_TYPE_ERROR_FIELD = "group";
+ public static final String PRIMITIVE_TYPE_ERROR_FIELD = "primitive";
+
+ private VoidPointable voidPointable;
+ private ATypeTag typeTag;
+
+ private byte[] b;
+ int s, l;
+ private ResettableByteArrayOutputStream byteArrayOutputStream;
+
+ public ParquetRecordVisitorUtils() {
+ this.voidPointable = VoidPointable.FACTORY.createPointable();
+ byteArrayOutputStream = new ResettableByteArrayOutputStream();
+ }
+
+ private void addIntegerType(long value, PrimitiveType.PrimitiveTypeName primitiveTypeName, ATypeTag typeTag,
+ RecordConsumer recordConsumer) throws HyracksDataException {
+ switch (primitiveTypeName) {
+ case INT32:
+ recordConsumer.addInteger((int) value);
+ break;
+ case INT64:
+ recordConsumer.addLong(value);
+ break;
+ case FLOAT:
+ recordConsumer.addFloat(value);
+ break;
+ case DOUBLE:
+ recordConsumer.addDouble(value);
+ break;
+ default:
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+ }
+
+ public void addValueToColumn(RecordConsumer recordConsumer, FlatLazyVisitablePointable pointable,
+ PrimitiveType type) throws HyracksDataException {
+
+ typeTag = pointable.getTypeTag();
+ b = pointable.getByteArray();
+
+ if (pointable.isTagged()) {
+ s = pointable.getStartOffset() + 1;
+ l = pointable.getLength() - 1;
+ } else {
+ s = pointable.getStartOffset();
+ l = pointable.getLength();
+ }
+ voidPointable.set(b, s, l);
+
+ PrimitiveType.PrimitiveTypeName primitiveTypeName = type.getPrimitiveTypeName();
+
+ switch (typeTag) {
+ case TINYINT:
+ byte tinyIntValue = AInt8SerializerDeserializer.getByte(b, s);
+ addIntegerType(tinyIntValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case SMALLINT:
+ short smallIntValue = AInt16SerializerDeserializer.getShort(b, s);
+ addIntegerType(smallIntValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case INTEGER:
+ int intValue = AInt32SerializerDeserializer.getInt(b, s);
+ addIntegerType(intValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case BIGINT:
+ long bigIntValue = AInt64SerializerDeserializer.getLong(b, s);
+ addIntegerType(bigIntValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case FLOAT:
+ float floatValue = AFloatSerializerDeserializer.getFloat(b, s);
+ switch (primitiveTypeName) {
+ case INT32:
+ recordConsumer.addInteger((int) floatValue);
+ break;
+ case INT64:
+ recordConsumer.addLong((long) floatValue);
+ break;
+ case FLOAT:
+ recordConsumer.addFloat(floatValue);
+ break;
+ case DOUBLE:
+ recordConsumer.addDouble(floatValue);
+ break;
+ default:
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+ break;
+ case DOUBLE:
+ double doubleValue = ADoubleSerializerDeserializer.getDouble(b, s);
+ switch (primitiveTypeName) {
+ case INT32:
+ recordConsumer.addInteger((int) doubleValue);
+ break;
+ case INT64:
+ recordConsumer.addLong((long) doubleValue);
+ break;
+ case FLOAT:
+ recordConsumer.addFloat((float) doubleValue);
+ break;
+ case DOUBLE:
+ recordConsumer.addDouble(doubleValue);
+ break;
+ default:
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+ break;
+ case STRING:
+ int utfLength = UTF8StringUtil.getUTFLength(b, s);
+ if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.BINARY) {
+ byteArrayOutputStream.reset();
+ try {
+ PrintTools.writeUTF8StringAsJSONUnquoted(b, s, l, utfLength, byteArrayOutputStream);
+ } catch (IOException e) {
+ throw HyracksDataException.create(e);
+ }
+ recordConsumer.addBinary(Binary.fromReusedByteArray(byteArrayOutputStream.getByteArray(), 0,
+ byteArrayOutputStream.getLength()));
+
+ } else {
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+ break;
+ case BOOLEAN:
+ boolean booleanValue = ABooleanSerializerDeserializer.getBoolean(b, s);
+ if (primitiveTypeName == PrimitiveType.PrimitiveTypeName.BOOLEAN) {
+ recordConsumer.addBoolean(booleanValue);
+ } else {
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+ break;
+ case DATE:
+ int dateValue = ADateSerializerDeserializer.getChronon(b, s);
+ addIntegerType(dateValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case TIME:
+ int timeValue = ATimeSerializerDeserializer.getChronon(b, s);
+ addIntegerType(timeValue, primitiveTypeName, typeTag, recordConsumer);
+ break;
+ case DATETIME:
+ long dateTimeValue = ADateTimeSerializerDeserializer.getChronon(b, s);
+ addIntegerType(dateTimeValue, primitiveTypeName, typeTag, recordConsumer);
+ case NULL:
+ case MISSING:
+ break;
+ default:
+ throw RuntimeDataException.create(ErrorCode.TYPE_MISMATCH_GENERIC, typeTag, primitiveTypeName);
+ }
+
+ }
+
+}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
index b8583d0..c931a93 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/provider/ExternalWriterProvider.java
@@ -31,8 +31,10 @@
import org.apache.asterix.external.writer.compressor.GzipExternalFileCompressStreamFactory;
import org.apache.asterix.external.writer.compressor.IExternalFileCompressStreamFactory;
import org.apache.asterix.external.writer.compressor.NoOpExternalFileCompressStreamFactory;
+import org.apache.asterix.external.writer.printer.ParquetExternalFilePrinterFactory;
import org.apache.asterix.external.writer.printer.TextualExternalFilePrinterFactory;
import org.apache.asterix.formats.nontagged.CleanJSONPrinterFactoryProvider;
+import org.apache.asterix.om.types.IAType;
import org.apache.asterix.runtime.writer.ExternalFileWriterConfiguration;
import org.apache.asterix.runtime.writer.IExternalFileWriterFactory;
import org.apache.asterix.runtime.writer.IExternalFileWriterFactoryProvider;
@@ -41,6 +43,8 @@
import org.apache.hyracks.algebricks.data.IPrinterFactory;
import org.apache.hyracks.api.exceptions.SourceLocation;
import org.apache.hyracks.control.cc.ClusterControllerService;
+import org.apache.hyracks.util.StorageUtil;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
public class ExternalWriterProvider {
private static final Map<String, IExternalFileWriterFactoryProvider> CREATOR_MAP;
@@ -111,16 +115,54 @@
Map<String, String> configuration = sink.getConfiguration();
String format = configuration.get(ExternalDataConstants.KEY_FORMAT);
- // Only JSON is supported for now
- if (!ExternalDataConstants.FORMAT_JSON_LOWER_CASE.equalsIgnoreCase(format)) {
+ // Only JSON and parquet is supported for now
+ if (!ExternalDataConstants.FORMAT_JSON_LOWER_CASE.equalsIgnoreCase(format)
+ && !ExternalDataConstants.FORMAT_PARQUET.equalsIgnoreCase(format)) {
throw new UnsupportedOperationException("Unsupported format " + format);
}
String compression = getCompression(configuration);
- IExternalFileCompressStreamFactory compressStreamFactory =
- createCompressionStreamFactory(appCtx, compression, configuration);
- IPrinterFactory printerFactory = CleanJSONPrinterFactoryProvider.INSTANCE.getPrinterFactory(sourceType);
- return new TextualExternalFilePrinterFactory(printerFactory, compressStreamFactory);
+
+ switch (format) {
+ case ExternalDataConstants.FORMAT_JSON_LOWER_CASE:
+ IExternalFileCompressStreamFactory compressStreamFactory =
+ createCompressionStreamFactory(appCtx, compression, configuration);
+ IPrinterFactory printerFactory = CleanJSONPrinterFactoryProvider.INSTANCE.getPrinterFactory(sourceType);
+ return new TextualExternalFilePrinterFactory(printerFactory, compressStreamFactory);
+ case ExternalDataConstants.FORMAT_PARQUET:
+
+ if (!configuration.containsKey(ExternalDataConstants.KEY_SCHEMA)) {
+ throw new UnsupportedOperationException("Schema not provided for parquet");
+ }
+ String schema = configuration.get(ExternalDataConstants.KEY_SCHEMA);
+ CompressionCodecName compressionCodecName;
+ if (compression == null || compression.equals("") || compression.equals("none")) {
+ compressionCodecName = CompressionCodecName.UNCOMPRESSED;
+ } else {
+ compressionCodecName = CompressionCodecName.valueOf(compression.toUpperCase());
+ }
+
+ String rowGroupSizeString = getRowGroupSize(configuration);
+ String pageSizeString = getPageSize(configuration);
+
+ long rowGroupSize = StorageUtil.getByteValue(rowGroupSizeString);
+ int pageSize = (int) StorageUtil.getByteValue(pageSizeString);
+
+ return new ParquetExternalFilePrinterFactory(compressionCodecName, schema, (IAType) sourceType,
+ rowGroupSize, pageSize);
+ default:
+ throw new UnsupportedOperationException("Unsupported format " + format);
+ }
+ }
+
+ private static String getRowGroupSize(Map<String, String> configuration) {
+ return configuration.getOrDefault(ExternalDataConstants.KEY_PARQUET_ROW_GROUP_SIZE,
+ ExternalDataConstants.PARQUET_DEFAULT_ROW_GROUP_SIZE);
+ }
+
+ private static String getPageSize(Map<String, String> configuration) {
+ return configuration.getOrDefault(ExternalDataConstants.KEY_PARQUET_PAGE_SIZE,
+ ExternalDataConstants.PARQUET_DEFAULT_PAGE_SIZE);
}
private static String getFormat(Map<String, String> configuration) {
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
index 59a4da4..319d440 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/java/org/apache/hyracks/api/exceptions/ErrorCode.java
@@ -158,6 +158,9 @@
UNSUPPORTED_WRITE_SPEC(128),
JOB_REJECTED(129),
FRAME_BIGGER_THAN_SORT_MEMORY(130),
+ ILLGEAL_PARQUET_SCHEMA(131),
+ RESULT_DOES_NOT_FOLLOW_SCHEMA(132),
+ EXTRA_FIELD_IN_RESULT_NOT_FOUND_IN_SCHEMA(133),
// Compilation error codes.
RULECOLLECTION_NOT_INSTANCE_OF_LIST(10000),
diff --git a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
index e94c12e..51acdc0 100644
--- a/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
+++ b/hyracks-fullstack/hyracks/hyracks-api/src/main/resources/errormsg/en.properties
@@ -148,6 +148,9 @@
128 = Unsupported copy to specification: PARTITION BY %1$s, ORDER BY %2$s
129 = Job %1$s failed to run. Cluster is not accepting jobs.
130 = Frame data=%1$s (requiring %2$s) is bigger than the sort budget. Used=%3$s, max=%4$s. Please increase the sort memory budget.
+131 = Invalid parquet schema provided
+132 = Result does not follow the schema, %1$s type expected but found %2$s type at '%3$s'
+133 = Extra field in the result, field '%1$s' does not exist at '%2$s' in the schema
10000 = The given rule collection %1$s is not an instance of the List class.
10001 = Cannot compose partition constraint %1$s with %2$s