[ASTERIXDB-2724][EXT] Handle passing empty defintion to external datasets

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- When an empty string is passed as an external dataset's definition, then
  no prefix is supplied to the AWS client (nothing to filter).
- Added a test case for the above mentioned item.

Change-Id: I500e7afb97aa076b690ef3b98ee83c8f5934a88f
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6183
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Michael Blow <mblow@apache.org>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
index 88cd6b5..bd433f1 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
@@ -44,9 +44,13 @@
         ONLY_TESTS = "only_external_dataset.xml";
         TEST_CONFIG_FILE_NAME = "src/test/resources/cc-single.conf";
         PREPARE_S3_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket;
+        PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
     private static void prepareS3Bucket() {
     }
+
+    private static void prepareFixedDataBucket() {
+    }
 }
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index b5866e9..37a3916 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -83,6 +83,7 @@
     static String ONLY_TESTS;
     static String TEST_CONFIG_FILE_NAME;
     static Runnable PREPARE_S3_BUCKET;
+    static Runnable PREPARE_FIXED_DATA_BUCKET;
 
     // Base directory paths for data files
     private static final String JSON_DATA_PATH = joinPath("data", "json");
@@ -96,6 +97,7 @@
     // Region, bucket and definitions
     private static final String S3_MOCK_SERVER_REGION = "us-west-2";
     private static final String S3_MOCK_SERVER_BUCKET = "playground";
+    private static final String S3_MOCK_SERVER_FIXED_DATA_BUCKET = "fixed-data-bucket"; // Do not use, has fixed data
     private static final String S3_MOCK_SERVER_BUCKET_JSON_DEFINITION = "json-data/reviews/"; // data resides here
     private static final String S3_MOCK_SERVER_BUCKET_CSV_DEFINITION = "csv-data/reviews/"; // data resides here
     private static final String S3_MOCK_SERVER_BUCKET_TSV_DEFINITION = "tsv-data/reviews/"; // data resides here
@@ -145,6 +147,7 @@
         ONLY_TESTS = "only_external_dataset.xml";
         TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
         PREPARE_S3_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket;
+        PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareFixedDataBucket;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
@@ -187,6 +190,7 @@
 
         // Create the bucket and upload some json files
         PREPARE_S3_BUCKET.run();
+        PREPARE_FIXED_DATA_BUCKET.run();
     }
 
     /**
@@ -210,6 +214,26 @@
         LOGGER.info("TSV Files added successfully");
     }
 
+    /**
+     * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
+     * changed, the test case will fail and its result will need to be updated each time
+     */
+    private static void prepareFixedDataBucket() {
+        LOGGER.info("creating bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+        client.createBucket(CreateBucketRequest.builder().bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).build());
+        LOGGER.info("bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET + " created successfully");
+
+        LOGGER.info("Loading fixed data to " + S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+
+        // Files data
+        RequestBody requestBody = RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", "20-records.json"));
+        client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("1.json").build(), requestBody);
+        client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("2.json").build(), requestBody);
+        client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/3.json").build(), requestBody);
+        client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/4.json").build(), requestBody);
+        client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(), requestBody);
+    }
+
     private static void loadJsonFiles() {
         String dataBasePath = JSON_DATA_PATH;
         String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..115967d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test if exists;
+create external dataset test(test) using S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="fixed-data-bucket"),
+("definition"=""),
+("format"="json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
new file mode 100644
index 0000000..ec40ee0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select count(*) `count` from test;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
new file mode 100644
index 0000000..187a8cb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
@@ -0,0 +1 @@
+{ "count": 100 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index 02846d1..551d777 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -51,19 +51,20 @@
       <compilation-unit name="aws/s3/tsv/tsv">
         <output-dir compare="Text">aws/s3/tsv/tsv</output-dir>
       </compilation-unit>
-    </test-case><test-case FilePath="external-dataset">
-    <compilation-unit name="aws/s3/tsv/gz">
-      <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
-    </compilation-unit>
-  </test-case><test-case FilePath="external-dataset">
-    <compilation-unit name="aws/s3/tsv/mixed">
-      <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
-    </compilation-unit>
-  </test-case>
+    </test-case>
     <test-case FilePath="external-dataset">
-      <compilation-unit name="aws/s3/negative">
-        <output-dir compare="Text">aws/s3/negative</output-dir>
-        <expected-error>Parameter(s) format must be specified</expected-error>
+      <compilation-unit name="aws/s3/tsv/gz">
+        <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/tsv/mixed">
+        <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="external-dataset">
+      <compilation-unit name="aws/s3/empty-string-definition">
+        <output-dir compare="Text">aws/s3/empty-string-definition</output-dir>
       </compilation-unit>
     </test-case>
   </test-group>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 451a783..f0da535 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -89,7 +89,7 @@
         ListObjectsRequest.Builder listObjectsBuilder = ListObjectsRequest.builder().bucket(container);
         String path = configuration.get(AwsS3Constants.DEFINITION_FIELD_NAME);
         if (path != null) {
-            listObjectsBuilder.prefix(path + (path.endsWith("/") ? "" : "/"));
+            listObjectsBuilder.prefix(path + (!path.isEmpty() && !path.endsWith("/") ? "/" : ""));
         }
         ListObjectsResponse listObjectsResponse = s3Client.listObjects(listObjectsBuilder.build());
         List<S3Object> s3Objects = listObjectsResponse.contents();
@@ -123,7 +123,6 @@
             throw AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, fileFormat);
         }
 
-        // TODO(Hussain): We will have a property that can disable checking for .gz here
         s3Objects.stream().filter(object -> isValidFile(object.key(), fileFormat)).forEach(filesOnly::add);
 
         return filesOnly;