Merge branch 'gerrit/neo'
Change-Id: Ib1f7319d96e5d157fbcb226358d5e19444549aca
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index f6b501a..094c1db 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -19,6 +19,7 @@
package org.apache.asterix.test.external_dataset;
import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
+import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BOM_FILE_CONTAINER;
import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
import java.io.BufferedWriter;
@@ -62,6 +63,7 @@
private static Uploader playgroundDataLoader;
private static Uploader fixedDataLoader;
private static Uploader mixedDataLoader;
+ private static Uploader bomFileLoader;
protected TestCaseContext tcCtx;
@@ -95,10 +97,12 @@
TSV_DATA_PATH = tsvDataPath;
}
- public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader) {
+ public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader,
+ Uploader bomFileLoader) {
ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+ ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
}
/**
@@ -148,6 +152,30 @@
fixedDataLoader.upload("lvl1/lvl2/5.json", path, true, false);
}
+ /**
+ * This bucket contains files that start with byte order mark (BOM): U+FEFF
+ */
+ public static void prepareBomFileContainer() {
+ LOGGER.info("Loading bom files data to " + BOM_FILE_CONTAINER);
+
+ // Files data
+ bomFileLoader.upload("1.json", "\uFEFF{\"id\": 1, \"age\": 1}", false, false);
+ bomFileLoader.upload("2.json", "\uFEFF{\"id\": 2, \"age\": 2}", false, false);
+ bomFileLoader.upload("3.json", "\uFEFF{\"id\": 3, \"age\": 3}", false, false);
+ bomFileLoader.upload("4.json", "\uFEFF{\"id\": 4, \"age\": 4}", false, false);
+ bomFileLoader.upload("5.json", "\uFEFF{\"id\": 5, \"age\": 5}", false, false);
+ bomFileLoader.upload("1.csv", "\uFEFF1,1", false, false);
+ bomFileLoader.upload("2.csv", "\uFEFF2,2", false, false);
+ bomFileLoader.upload("3.csv", "\uFEFF3,3", false, false);
+ bomFileLoader.upload("4.csv", "\uFEFF4,4", false, false);
+ bomFileLoader.upload("5.csv", "\uFEFF5,5", false, false);
+ bomFileLoader.upload("1.tsv", "\uFEFF1\t1", false, false);
+ bomFileLoader.upload("2.tsv", "\uFEFF2\t2", false, false);
+ bomFileLoader.upload("3.tsv", "\uFEFF3\t3", false, false);
+ bomFileLoader.upload("4.tsv", "\uFEFF4\t4", false, false);
+ bomFileLoader.upload("5.tsv", "\uFEFF5\t5", false, false);
+ }
+
public static void loadJsonFiles() {
String dataBasePath = JSON_DATA_PATH;
String definition = JSON_DEFINITION;
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
index 8114873..6c07fab 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
@@ -46,6 +46,7 @@
PREPARE_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket;
PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket;
PREPARE_MIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareMixedDataBucket;
+ PREPARE_BOM_FILE_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareBomDataBucket;
return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
}
@@ -57,4 +58,7 @@
private static void prepareMixedDataBucket() {
}
+
+ private static void prepareBomDataBucket() {
+ }
}
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 8035f5a..05b0d0b 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -89,6 +89,7 @@
static Runnable PREPARE_BUCKET;
static Runnable PREPARE_FIXED_DATA_BUCKET;
static Runnable PREPARE_MIXED_DATA_BUCKET;
+ static Runnable PREPARE_BOM_FILE_BUCKET;
// Base directory paths for data files
private static final String JSON_DATA_PATH = joinPath("data", "json");
@@ -115,12 +116,15 @@
public static final String PLAYGROUND_CONTAINER = "playground";
public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not use, has fixed data
public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
+ public static final String BOM_FILE_CONTAINER = "bom-file-container";
public static final PutObjectRequest.Builder playgroundBuilder =
PutObjectRequest.builder().bucket(PLAYGROUND_CONTAINER);
public static final PutObjectRequest.Builder fixedDataBuilder =
PutObjectRequest.builder().bucket(FIXED_DATA_CONTAINER);
public static final PutObjectRequest.Builder includeExcludeBuilder =
PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER);
+ public static final PutObjectRequest.Builder bomFileContainerBuilder =
+ PutObjectRequest.builder().bucket(BOM_FILE_CONTAINER);
public AwsS3ExternalDatasetTest(TestCaseContext tcCtx) {
this.tcCtx = tcCtx;
@@ -158,6 +162,8 @@
PREPARE_BUCKET = ExternalDatasetTestUtils::preparePlaygroundContainer;
PREPARE_FIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareFixedDataContainer;
PREPARE_MIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareMixedDataContainer;
+ PREPARE_BOM_FILE_BUCKET = ExternalDatasetTestUtils::prepareBomFileContainer;
+
return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
}
@@ -199,15 +205,17 @@
client.createBucket(CreateBucketRequest.builder().bucket(PLAYGROUND_CONTAINER).build());
client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_CONTAINER).build());
client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER).build());
+ client.createBucket(CreateBucketRequest.builder().bucket(BOM_FILE_CONTAINER).build());
LOGGER.info("Client created successfully");
// Create the bucket and upload some json files
setDataPaths(JSON_DATA_PATH, CSV_DATA_PATH, TSV_DATA_PATH);
setUploaders(AwsS3ExternalDatasetTest::loadPlaygroundData, AwsS3ExternalDatasetTest::loadFixedData,
- AwsS3ExternalDatasetTest::loadMixedData);
+ AwsS3ExternalDatasetTest::loadMixedData, AwsS3ExternalDatasetTest::loadBomData);
PREPARE_BUCKET.run();
PREPARE_FIXED_DATA_BUCKET.run();
PREPARE_MIXED_DATA_BUCKET.run();
+ PREPARE_BOM_FILE_BUCKET.run();
}
private static void loadPlaygroundData(String key, String content, boolean fromFile, boolean gzipped) {
@@ -222,6 +230,10 @@
client.putObject(includeExcludeBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
}
+ private static void loadBomData(String key, String content, boolean fromFile, boolean gzipped) {
+ client.putObject(bomFileContainerBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
+ }
+
private static RequestBody getRequestBody(String content, boolean fromFile, boolean gzipped) {
RequestBody body;
// Content is string
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp
new file mode 100644
index 0000000..69e42c1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as { id: int, age: int };
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="csv"),
+("include"="*.csv"),
+("header"=False),
+("null"="")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp
new file mode 100644
index 0000000..ad6513f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="json"),
+("include"="*.json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp
new file mode 100644
index 0000000..956e835
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as { id: int, age: int };
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="tsv"),
+("include"="*.tsv"),
+("header"=False),
+("null"="")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
index 8844368..2e1a6bf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
@@ -364,4 +364,24 @@
</compilation-unit>
</test-case>
</test-group>
+ <test-group name="bom">
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/json">
+ <placeholder name="adapter" value="AZUREBLOB" />
+ <output-dir compare="Text">common/byte_order_mark/json</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/csv">
+ <placeholder name="adapter" value="AZUREBLOB" />
+ <output-dir compare="Text">common/byte_order_mark/csv</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/tsv">
+ <placeholder name="adapter" value="AZUREBLOB" />
+ <output-dir compare="Text">common/byte_order_mark/tsv</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
</test-suite>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 11a2fe7..bacc23b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -411,4 +411,24 @@
</compilation-unit>
</test-case>
</test-group>
+ <test-group name="bom">
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/json">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/byte_order_mark/json</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/csv">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/byte_order_mark/csv</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/byte_order_mark/tsv">
+ <placeholder name="adapter" value="S3" />
+ <output-dir compare="Text">common/byte_order_mark/tsv</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
</test-suite>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
index 4b86142..db20d31 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.external.input.record.reader.stream;
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
+
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
@@ -121,6 +123,10 @@
}
}
for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+ if (inputBuffer[bufferPosn] == BYTE_ORDER_MARK) {
+ startPosn++;
+ continue;
+ }
if (inputBuffer[bufferPosn] == ExternalDataConstants.LF) {
newlineLength = (prevCharCR) ? 2 : 1;
++bufferPosn; // at next invocation proceed from following byte
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 4c253bc..4433b49 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.input.record.reader.stream;
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
import static org.apache.asterix.external.util.ExternalDataConstants.REC_ENDED_AT_EOF;
import java.io.IOException;
@@ -119,6 +120,10 @@
boolean maybeInQuote = false;
for (; bufferPosn < bufferLength; ++bufferPosn) {
char ch = inputBuffer[bufferPosn];
+ if (ch == BYTE_ORDER_MARK) {
+ startPosn++;
+ continue;
+ }
// count lines here since we need to also count the lines inside quotes
if (ch == ExternalDataConstants.LF || prevCharCR) {
lineNumber++;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 0e23e46..2c31a0a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -18,6 +18,7 @@
*/
package org.apache.asterix.external.input.record.reader.stream;
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
import static org.apache.asterix.external.util.ExternalDataConstants.CLOSING_BRACKET;
import static org.apache.asterix.external.util.ExternalDataConstants.COMMA;
import static org.apache.asterix.external.util.ExternalDataConstants.CR;
@@ -134,7 +135,7 @@
lineNumber++;
}
isLastCharCR = c == CR;
- if (c == SPACE || c == TAB || c == LF || c == CR) {
+ if (c == SPACE || c == TAB || c == LF || c == CR || c == BYTE_ORDER_MARK) {
continue;
}
if (c == recordStart && state != State.NESTED_OBJECT) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index b462bd9..89d1132 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -255,6 +255,7 @@
public static final char OPEN_BRACKET = '[';
public static final char CLOSING_BRACKET = ']';
public static final char COMMA = ',';
+ public static final char BYTE_ORDER_MARK = '\uFEFF';
/**
* Constant byte characters