[ASTERIXDB-2724][EXT] Handle passing empty defintion to external datasets
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- When an empty string is passed as an external dataset's definition, then
no prefix is supplied to the AWS client (nothing to filter).
- Added a test case for the above mentioned item.
Change-Id: I500e7afb97aa076b690ef3b98ee83c8f5934a88f
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/6183
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
Reviewed-by: Michael Blow <mblow@apache.org>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
index 88cd6b5..bd433f1 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetOnePartitionTest.java
@@ -44,9 +44,13 @@
ONLY_TESTS = "only_external_dataset.xml";
TEST_CONFIG_FILE_NAME = "src/test/resources/cc-single.conf";
PREPARE_S3_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareS3Bucket;
+ PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetOnePartitionTest::prepareFixedDataBucket;
return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
}
private static void prepareS3Bucket() {
}
+
+ private static void prepareFixedDataBucket() {
+ }
}
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index b5866e9..37a3916 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -83,6 +83,7 @@
static String ONLY_TESTS;
static String TEST_CONFIG_FILE_NAME;
static Runnable PREPARE_S3_BUCKET;
+ static Runnable PREPARE_FIXED_DATA_BUCKET;
// Base directory paths for data files
private static final String JSON_DATA_PATH = joinPath("data", "json");
@@ -96,6 +97,7 @@
// Region, bucket and definitions
private static final String S3_MOCK_SERVER_REGION = "us-west-2";
private static final String S3_MOCK_SERVER_BUCKET = "playground";
+ private static final String S3_MOCK_SERVER_FIXED_DATA_BUCKET = "fixed-data-bucket"; // Do not use, has fixed data
private static final String S3_MOCK_SERVER_BUCKET_JSON_DEFINITION = "json-data/reviews/"; // data resides here
private static final String S3_MOCK_SERVER_BUCKET_CSV_DEFINITION = "csv-data/reviews/"; // data resides here
private static final String S3_MOCK_SERVER_BUCKET_TSV_DEFINITION = "tsv-data/reviews/"; // data resides here
@@ -145,6 +147,7 @@
ONLY_TESTS = "only_external_dataset.xml";
TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
PREPARE_S3_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket;
+ PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareFixedDataBucket;
return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
}
@@ -187,6 +190,7 @@
// Create the bucket and upload some json files
PREPARE_S3_BUCKET.run();
+ PREPARE_FIXED_DATA_BUCKET.run();
}
/**
@@ -210,6 +214,26 @@
LOGGER.info("TSV Files added successfully");
}
+ /**
+ * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
+ * changed, the test case will fail and its result will need to be updated each time
+ */
+ private static void prepareFixedDataBucket() {
+ LOGGER.info("creating bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+ client.createBucket(CreateBucketRequest.builder().bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).build());
+ LOGGER.info("bucket " + S3_MOCK_SERVER_FIXED_DATA_BUCKET + " created successfully");
+
+ LOGGER.info("Loading fixed data to " + S3_MOCK_SERVER_FIXED_DATA_BUCKET);
+
+ // Files data
+ RequestBody requestBody = RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", "20-records.json"));
+ client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("1.json").build(), requestBody);
+ client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("2.json").build(), requestBody);
+ client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/3.json").build(), requestBody);
+ client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/4.json").build(), requestBody);
+ client.putObject(builder.bucket(S3_MOCK_SERVER_FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(), requestBody);
+ }
+
private static void loadJsonFiles() {
String dataBasePath = JSON_DATA_PATH;
String definition = S3_MOCK_SERVER_BUCKET_JSON_DEFINITION;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
new file mode 100644
index 0000000..115967d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.000.ddl.sqlpp
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+};
+
+drop dataset test if exists;
+create external dataset test(test) using S3 (
+("accessKey"="dummyAccessKey"),
+("secretKey"="dummySecretKey"),
+("region"="us-west-2"),
+("serviceEndpoint"="http://localhost:8001"),
+("container"="fixed-data-bucket"),
+("definition"=""),
+("format"="json")
+);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
new file mode 100644
index 0000000..ec40ee0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.001.query.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select count(*) `count` from test;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/aws/s3/empty-string-definition/external_dataset.099.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
new file mode 100644
index 0000000..187a8cb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/aws/s3/empty-string-definition/external_dataset.001.adm
@@ -0,0 +1 @@
+{ "count": 100 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
index 02846d1..551d777 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset.xml
@@ -51,19 +51,20 @@
<compilation-unit name="aws/s3/tsv/tsv">
<output-dir compare="Text">aws/s3/tsv/tsv</output-dir>
</compilation-unit>
- </test-case><test-case FilePath="external-dataset">
- <compilation-unit name="aws/s3/tsv/gz">
- <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
- </compilation-unit>
- </test-case><test-case FilePath="external-dataset">
- <compilation-unit name="aws/s3/tsv/mixed">
- <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
- </compilation-unit>
- </test-case>
+ </test-case>
<test-case FilePath="external-dataset">
- <compilation-unit name="aws/s3/negative">
- <output-dir compare="Text">aws/s3/negative</output-dir>
- <expected-error>Parameter(s) format must be specified</expected-error>
+ <compilation-unit name="aws/s3/tsv/gz">
+ <output-dir compare="Text">aws/s3/tsv/gz</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="aws/s3/tsv/mixed">
+ <output-dir compare="Text">aws/s3/tsv/mixed</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="aws/s3/empty-string-definition">
+ <output-dir compare="Text">aws/s3/empty-string-definition</output-dir>
</compilation-unit>
</test-case>
</test-group>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 451a783..f0da535 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -89,7 +89,7 @@
ListObjectsRequest.Builder listObjectsBuilder = ListObjectsRequest.builder().bucket(container);
String path = configuration.get(AwsS3Constants.DEFINITION_FIELD_NAME);
if (path != null) {
- listObjectsBuilder.prefix(path + (path.endsWith("/") ? "" : "/"));
+ listObjectsBuilder.prefix(path + (!path.isEmpty() && !path.endsWith("/") ? "/" : ""));
}
ListObjectsResponse listObjectsResponse = s3Client.listObjects(listObjectsBuilder.build());
List<S3Object> s3Objects = listObjectsResponse.contents();
@@ -123,7 +123,6 @@
throw AsterixException.create(ErrorCode.PROVIDER_STREAM_RECORD_READER_UNKNOWN_FORMAT, fileFormat);
}
- // TODO(Hussain): We will have a property that can disable checking for .gz here
s3Objects.stream().filter(object -> isValidFile(object.key(), fileFormat)).forEach(filesOnly::add);
return filesOnly;