Merge branch 'gerrit/neo'

Change-Id: Ib1f7319d96e5d157fbcb226358d5e19444549aca
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
index f6b501a..094c1db 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -19,6 +19,7 @@
 package org.apache.asterix.test.external_dataset;
 
 import static org.apache.asterix.test.external_dataset.BinaryFileConverterUtil.BINARY_GEN_BASEDIR;
+import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.BOM_FILE_CONTAINER;
 import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.FIXED_DATA_CONTAINER;
 
 import java.io.BufferedWriter;
@@ -62,6 +63,7 @@
     private static Uploader playgroundDataLoader;
     private static Uploader fixedDataLoader;
     private static Uploader mixedDataLoader;
+    private static Uploader bomFileLoader;
 
     protected TestCaseContext tcCtx;
 
@@ -95,10 +97,12 @@
         TSV_DATA_PATH = tsvDataPath;
     }
 
-    public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader) {
+    public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader,
+            Uploader bomFileLoader) {
         ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
         ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
         ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+        ExternalDatasetTestUtils.bomFileLoader = bomFileLoader;
     }
 
     /**
@@ -148,6 +152,30 @@
         fixedDataLoader.upload("lvl1/lvl2/5.json", path, true, false);
     }
 
+    /**
+     * This bucket contains files that start with byte order mark (BOM): U+FEFF
+     */
+    public static void prepareBomFileContainer() {
+        LOGGER.info("Loading bom files data to " + BOM_FILE_CONTAINER);
+
+        // Files data
+        bomFileLoader.upload("1.json", "\uFEFF{\"id\": 1, \"age\": 1}", false, false);
+        bomFileLoader.upload("2.json", "\uFEFF{\"id\": 2, \"age\": 2}", false, false);
+        bomFileLoader.upload("3.json", "\uFEFF{\"id\": 3, \"age\": 3}", false, false);
+        bomFileLoader.upload("4.json", "\uFEFF{\"id\": 4, \"age\": 4}", false, false);
+        bomFileLoader.upload("5.json", "\uFEFF{\"id\": 5, \"age\": 5}", false, false);
+        bomFileLoader.upload("1.csv", "\uFEFF1,1", false, false);
+        bomFileLoader.upload("2.csv", "\uFEFF2,2", false, false);
+        bomFileLoader.upload("3.csv", "\uFEFF3,3", false, false);
+        bomFileLoader.upload("4.csv", "\uFEFF4,4", false, false);
+        bomFileLoader.upload("5.csv", "\uFEFF5,5", false, false);
+        bomFileLoader.upload("1.tsv", "\uFEFF1\t1", false, false);
+        bomFileLoader.upload("2.tsv", "\uFEFF2\t2", false, false);
+        bomFileLoader.upload("3.tsv", "\uFEFF3\t3", false, false);
+        bomFileLoader.upload("4.tsv", "\uFEFF4\t4", false, false);
+        bomFileLoader.upload("5.tsv", "\uFEFF5\t5", false, false);
+    }
+
     public static void loadJsonFiles() {
         String dataBasePath = JSON_DATA_PATH;
         String definition = JSON_DEFINITION;
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
index 8114873..6c07fab 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
@@ -46,6 +46,7 @@
         PREPARE_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket;
         PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket;
         PREPARE_MIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareMixedDataBucket;
+        PREPARE_BOM_FILE_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareBomDataBucket;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
@@ -57,4 +58,7 @@
 
     private static void prepareMixedDataBucket() {
     }
+
+    private static void prepareBomDataBucket() {
+    }
 }
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 8035f5a..05b0d0b 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -89,6 +89,7 @@
     static Runnable PREPARE_BUCKET;
     static Runnable PREPARE_FIXED_DATA_BUCKET;
     static Runnable PREPARE_MIXED_DATA_BUCKET;
+    static Runnable PREPARE_BOM_FILE_BUCKET;
 
     // Base directory paths for data files
     private static final String JSON_DATA_PATH = joinPath("data", "json");
@@ -115,12 +116,15 @@
     public static final String PLAYGROUND_CONTAINER = "playground";
     public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not use, has fixed data
     public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
+    public static final String BOM_FILE_CONTAINER = "bom-file-container";
     public static final PutObjectRequest.Builder playgroundBuilder =
             PutObjectRequest.builder().bucket(PLAYGROUND_CONTAINER);
     public static final PutObjectRequest.Builder fixedDataBuilder =
             PutObjectRequest.builder().bucket(FIXED_DATA_CONTAINER);
     public static final PutObjectRequest.Builder includeExcludeBuilder =
             PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER);
+    public static final PutObjectRequest.Builder bomFileContainerBuilder =
+            PutObjectRequest.builder().bucket(BOM_FILE_CONTAINER);
 
     public AwsS3ExternalDatasetTest(TestCaseContext tcCtx) {
         this.tcCtx = tcCtx;
@@ -158,6 +162,8 @@
         PREPARE_BUCKET = ExternalDatasetTestUtils::preparePlaygroundContainer;
         PREPARE_FIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareFixedDataContainer;
         PREPARE_MIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareMixedDataContainer;
+        PREPARE_BOM_FILE_BUCKET = ExternalDatasetTestUtils::prepareBomFileContainer;
+
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
@@ -199,15 +205,17 @@
         client.createBucket(CreateBucketRequest.builder().bucket(PLAYGROUND_CONTAINER).build());
         client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_CONTAINER).build());
         client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER).build());
+        client.createBucket(CreateBucketRequest.builder().bucket(BOM_FILE_CONTAINER).build());
         LOGGER.info("Client created successfully");
 
         // Create the bucket and upload some json files
         setDataPaths(JSON_DATA_PATH, CSV_DATA_PATH, TSV_DATA_PATH);
         setUploaders(AwsS3ExternalDatasetTest::loadPlaygroundData, AwsS3ExternalDatasetTest::loadFixedData,
-                AwsS3ExternalDatasetTest::loadMixedData);
+                AwsS3ExternalDatasetTest::loadMixedData, AwsS3ExternalDatasetTest::loadBomData);
         PREPARE_BUCKET.run();
         PREPARE_FIXED_DATA_BUCKET.run();
         PREPARE_MIXED_DATA_BUCKET.run();
+        PREPARE_BOM_FILE_BUCKET.run();
     }
 
     private static void loadPlaygroundData(String key, String content, boolean fromFile, boolean gzipped) {
@@ -222,6 +230,10 @@
         client.putObject(includeExcludeBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
     }
 
+    private static void loadBomData(String key, String content, boolean fromFile, boolean gzipped) {
+        client.putObject(bomFileContainerBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
+    }
+
     private static RequestBody getRequestBody(String content, boolean fromFile, boolean gzipped) {
         RequestBody body;
         // Content is string
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp
new file mode 100644
index 0000000..69e42c1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.000.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as { id: int, age: int };
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="csv"),
+("include"="*.csv"),
+("header"=False),
+("null"="")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/csv/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp
new file mode 100644
index 0000000..ad6513f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.000.ddl.sqlpp
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="json"),
+("include"="*.json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/json/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp
new file mode 100644
index 0000000..956e835
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.000.ddl.sqlpp
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as { id: int, age: int };
+
+drop dataset test1 if exists;
+CREATE EXTERNAL DATASET test1(test) USING %adapter% (
+%template%,
+("container"="bom-file-container"),
+("format"="tsv"),
+("include"="*.tsv"),
+("header"=False),
+("null"="")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp
new file mode 100644
index 0000000..5aa5580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value test1 from test1 order by id asc;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/byte_order_mark/tsv/test.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/csv/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/json/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm
new file mode 100644
index 0000000..19d10f6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/byte_order_mark/tsv/result.001.adm
@@ -0,0 +1,5 @@
+{ "id": 1, "age": 1 }
+{ "id": 2, "age": 2 }
+{ "id": 3, "age": 3 }
+{ "id": 4, "age": 4 }
+{ "id": 5, "age": 5 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
index 8844368..2e1a6bf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_azure_blob_storage.xml
@@ -364,4 +364,24 @@
       </compilation-unit>
     </test-case>
   </test-group>
+  <test-group name="bom">
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/json">
+        <placeholder name="adapter" value="AZUREBLOB" />
+        <output-dir compare="Text">common/byte_order_mark/json</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/csv">
+        <placeholder name="adapter" value="AZUREBLOB" />
+        <output-dir compare="Text">common/byte_order_mark/csv</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/tsv">
+        <placeholder name="adapter" value="AZUREBLOB" />
+        <output-dir compare="Text">common/byte_order_mark/tsv</output-dir>
+      </compilation-unit>
+    </test-case>
+  </test-group>
 </test-suite>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 11a2fe7..bacc23b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -411,4 +411,24 @@
       </compilation-unit>
     </test-case>
   </test-group>
+  <test-group name="bom">
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/json">
+        <placeholder name="adapter" value="S3" />
+        <output-dir compare="Text">common/byte_order_mark/json</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/csv">
+        <placeholder name="adapter" value="S3" />
+        <output-dir compare="Text">common/byte_order_mark/csv</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="common/byte_order_mark/tsv">
+        <placeholder name="adapter" value="S3" />
+        <output-dir compare="Text">common/byte_order_mark/tsv</output-dir>
+      </compilation-unit>
+    </test-case>
+  </test-group>
 </test-suite>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
index 4b86142..db20d31 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/LineRecordReader.java
@@ -18,6 +18,8 @@
  */
 package org.apache.asterix.external.input.record.reader.stream;
 
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
+
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collections;
@@ -121,6 +123,10 @@
                     }
                 }
                 for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+                    if (inputBuffer[bufferPosn] == BYTE_ORDER_MARK) {
+                        startPosn++;
+                        continue;
+                    }
                     if (inputBuffer[bufferPosn] == ExternalDataConstants.LF) {
                         newlineLength = (prevCharCR) ? 2 : 1;
                         ++bufferPosn; // at next invocation proceed from following byte
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
index 4c253bc..4433b49 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/QuotedLineRecordReader.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.input.record.reader.stream;
 
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
 import static org.apache.asterix.external.util.ExternalDataConstants.REC_ENDED_AT_EOF;
 
 import java.io.IOException;
@@ -119,6 +120,10 @@
                 boolean maybeInQuote = false;
                 for (; bufferPosn < bufferLength; ++bufferPosn) {
                     char ch = inputBuffer[bufferPosn];
+                    if (ch == BYTE_ORDER_MARK) {
+                        startPosn++;
+                        continue;
+                    }
                     // count lines here since we need to also count the lines inside quotes
                     if (ch == ExternalDataConstants.LF || prevCharCR) {
                         lineNumber++;
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
index 0e23e46..2c31a0a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/stream/SemiStructuredRecordReader.java
@@ -18,6 +18,7 @@
  */
 package org.apache.asterix.external.input.record.reader.stream;
 
+import static org.apache.asterix.external.util.ExternalDataConstants.BYTE_ORDER_MARK;
 import static org.apache.asterix.external.util.ExternalDataConstants.CLOSING_BRACKET;
 import static org.apache.asterix.external.util.ExternalDataConstants.COMMA;
 import static org.apache.asterix.external.util.ExternalDataConstants.CR;
@@ -134,7 +135,7 @@
                         lineNumber++;
                     }
                     isLastCharCR = c == CR;
-                    if (c == SPACE || c == TAB || c == LF || c == CR) {
+                    if (c == SPACE || c == TAB || c == LF || c == CR || c == BYTE_ORDER_MARK) {
                         continue;
                     }
                     if (c == recordStart && state != State.NESTED_OBJECT) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index b462bd9..89d1132 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -255,6 +255,7 @@
     public static final char OPEN_BRACKET = '[';
     public static final char CLOSING_BRACKET = ']';
     public static final char COMMA = ',';
+    public static final char BYTE_ORDER_MARK = '\uFEFF';
 
     /**
      * Constant byte characters