Reuse/share code for external dataset tests (p1)
Change-Id: Ie5caebd98a3c42536654035d5bc3954eaca6e6d5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11564
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
new file mode 100644
index 0000000..3c129b4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.test.external_dataset;
+
+import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.*;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.Assert;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class ExternalDatasetTestUtils {
+
+ protected static final Logger LOGGER = LogManager.getLogger();
+
+ // Base directory paths for data files
+ private static String JSON_DATA_PATH;
+ private static String CSV_DATA_PATH;
+ private static String TSV_DATA_PATH;
+
+ // IMPORTANT: The following values must be used in the AWS S3 test case
+ // Region, container and definitions
+ public static final String JSON_DEFINITION = "json-data/reviews/";
+ public static final String CSV_DEFINITION = "csv-data/reviews/";
+ public static final String TSV_DEFINITION = "tsv-data/reviews/";
+ public static final String MIXED_DEFINITION = "mixed-data/reviews/";
+
+ // This is used for a test to generate over 1000 number of files
+ public static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
+ public static final int OVER_1000_OBJECTS_COUNT = 2999;
+
+ private static Uploader playgroundDataLoader;
+ private static Uploader fixedDataLoader;
+ private static Uploader mixedDataLoader;
+
+ protected TestCaseContext tcCtx;
+
+ public interface Uploader {
+ default void upload(String key, String content) {
+ upload(key, content, false, false);
+ }
+
+ void upload(String key, String content, boolean fromFile, boolean gzipped);
+ }
+
+ public ExternalDatasetTestUtils(TestCaseContext tcCtx) {
+ this.tcCtx = tcCtx;
+ }
+
+ public static void setDataPaths(String jsonDataPath, String csvDataPath, String tsvDataPath) {
+ JSON_DATA_PATH = jsonDataPath;
+ CSV_DATA_PATH = csvDataPath;
+ TSV_DATA_PATH = tsvDataPath;
+ }
+
+ public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader) {
+ ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
+ ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
+ ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+ }
+
+ /**
+ * Creates a bucket and fills it with some files for testing purpose.
+ */
+ public static void preparePlaygroundContainer() {
+ LOGGER.info("Adding JSON files to the bucket");
+ loadJsonFiles();
+ LOGGER.info("JSON Files added successfully");
+
+ LOGGER.info("Adding CSV files to the bucket");
+ loadCsvFiles();
+ LOGGER.info("CSV Files added successfully");
+
+ LOGGER.info("Adding TSV files to the bucket");
+ loadTsvFiles();
+ LOGGER.info("TSV Files added successfully");
+
+ LOGGER.info("Adding a big JSON file");
+ loadBigJson();
+ LOGGER.info("JSON file added successfully");
+
+ LOGGER.info("Loading " + OVER_1000_OBJECTS_COUNT + " into " + OVER_1000_OBJECTS_PATH);
+ loadLargeNumberOfFiles();
+ LOGGER.info("Added " + OVER_1000_OBJECTS_COUNT + " files into " + OVER_1000_OBJECTS_PATH + " successfully");
+
+ LOGGER.info("Files added successfully");
+ }
+
+ /**
+ * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
+ * changed, the test case will fail and its result will need to be updated each time
+ */
+ public static void prepareFixedDataContainer() {
+ LOGGER.info("Loading fixed data to " + FIXED_DATA_CONTAINER);
+
+ // Files data
+ String path = Paths.get(JSON_DATA_PATH, "single-line", "20-records.json").toString();
+ fixedDataLoader.upload("1.json", path, true, false);
+ fixedDataLoader.upload("2.json", path, true, false);
+ fixedDataLoader.upload("lvl1/3.json", path, true, false);
+ fixedDataLoader.upload("lvl1/34.json", path, true, false);
+ fixedDataLoader.upload("lvl1/lvl2/5.json", path, true, false);
+ }
+
+ public static void loadJsonFiles() {
+ String dataBasePath = JSON_DATA_PATH;
+ String definition = JSON_DEFINITION;
+
+ // Normal format
+ String definitionSegment = "json";
+ loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+ false);
+
+ definitionSegment = "json-array-of-objects";
+ loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
+
+ // gz compressed format
+ definitionSegment = "gz";
+ loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+ false);
+
+ // Mixed normal and gz compressed format
+ definitionSegment = "mixed";
+ loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+ loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+ false);
+ loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+ false);
+ }
+
+ private static void loadCsvFiles() {
+ String dataBasePath = CSV_DATA_PATH;
+ String definition = CSV_DEFINITION;
+
+ // Normal format
+ String definitionSegment = "csv";
+ loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+ loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+
+ // gz compressed format
+ definitionSegment = "gz";
+ loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+
+ // Mixed normal and gz compressed format
+ definitionSegment = "mixed";
+ loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+ loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+ }
+
+ private static void loadTsvFiles() {
+ String dataBasePath = TSV_DATA_PATH;
+ String definition = TSV_DEFINITION;
+
+ // Normal format
+ String definitionSegment = "tsv";
+ loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+ loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+
+ // gz compressed format
+ definitionSegment = "gz";
+ loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+
+ // Mixed normal and gz compressed format
+ definitionSegment = "mixed";
+ loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+ loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+ loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+ }
+
+ private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+ String definitionSegment, boolean removeExtension) {
+ loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
+ }
+
+ private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+ String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
+ // Files data
+ Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
+
+ // Keep or remove the file extension
+ Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
+ String finalFileName;
+ if (removeExtension) {
+ finalFileName = FilenameUtils.removeExtension(filename);
+ } else {
+ finalFileName = filename;
+ }
+
+ // Files base definition
+ filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
+ definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
+ String basePath = definition + filePathSegment + definitionSegment;
+
+ // Load the data
+ String path = filePath.toString();
+ playgroundDataLoader.upload(basePath + finalFileName, path, true, false);
+ if (copyToSubLevels) {
+ playgroundDataLoader.upload(basePath + "level1a/" + finalFileName, path, true, false);
+ playgroundDataLoader.upload(basePath + "level1b/" + finalFileName, path, true, false);
+ playgroundDataLoader.upload(basePath + "level1a/level2a/" + finalFileName, path, true, false);
+ playgroundDataLoader.upload(basePath + "level1a/level2b/" + finalFileName, path, true, false);
+ }
+ }
+
+ private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
+ String definitionSegment, boolean removeExtension) {
+ // Keep or remove the file extension
+ Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
+ String finalFileName;
+ if (removeExtension) {
+ finalFileName = FilenameUtils.removeExtension(filename);
+ } else {
+ finalFileName = filename;
+ }
+ finalFileName += ".gz";
+
+ // Files base definition
+ filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
+ definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
+ String basePath = definition + filePathSegment + definitionSegment;
+
+ // Load the data
+ String path = Paths.get(fileBasePath, filePathSegment, filename).toString();
+ playgroundDataLoader.upload(basePath + finalFileName, path, true, true);
+ playgroundDataLoader.upload(basePath + "level1a/" + finalFileName, path, true, true);
+ playgroundDataLoader.upload(basePath + "level1b/" + finalFileName, path, true, true);
+ playgroundDataLoader.upload(basePath + "level1a/level2a/" + finalFileName, path, true, true);
+ playgroundDataLoader.upload(basePath + "level1a/level2b/" + finalFileName, path, true, true);
+ }
+
+ private static void loadBigJson() {
+ String fileName = "big_record.json";
+ int bufferSize = 4 * 1024 * 1024;
+ int maxSize = bufferSize * 9;
+ Path filePath = Paths.get("target", "rttest", "tmp", fileName);
+ try {
+ if (Files.notExists(filePath)) {
+ Files.createDirectories(filePath.getParent());
+ Files.createFile(filePath);
+ }
+ } catch (IOException ex) {
+ throw new IllegalStateException("File " + fileName + " not found");
+ }
+
+ try (FileWriter writer = new FileWriter(filePath.toFile(), false);
+ BufferedWriter bw = new BufferedWriter(writer, bufferSize)) {
+ bw.append("{ \"large_field\": \"");
+ for (int i = 0; i < maxSize; i++) {
+ bw.append('A');
+ }
+ bw.append("\" }");
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
+ String key = "big-json/" + fileName;
+ playgroundDataLoader.upload(key, filePath.toString(), true, false);
+ }
+
+ /**
+ * Generates over 1000 objects and upload them to S3 mock server, 1 record per object
+ */
+ private static void loadLargeNumberOfFiles() {
+ for (int i = 0; i < OVER_1000_OBJECTS_COUNT; i++) {
+ playgroundDataLoader.upload(OVER_1000_OBJECTS_PATH + "/" + i + ".json", "{\"id\":" + i + "}");
+ }
+ }
+
+ /**
+ * Loads a combination of different file formats in the same path
+ */
+ public static void prepareMixedDataContainer() {
+ // JSON
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2018.json", "{\"id\":" + 1 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2019.json", "{\"id\":" + 2 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2020.json", "{\"id\":" + 3 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2018.json", "{\"id\":" + 4 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2019.json", "{\"id\":" + 5 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2020.json", "{\"id\":" + 6 + "}");
+
+ // CSV
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2018.csv", "7,\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2019.csv", "8,\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2020.csv", "9,\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2018.csv", "10,\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2019.csv", "11,\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2020.csv", "12,\"good\"");
+
+ // TSV
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2018.tsv", "13\t\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2019.tsv", "14\t\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2020.tsv", "15\t\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2018.tsv", "16\t\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2019.tsv", "17\t\"good\"");
+ mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2020.tsv", "18\t\"good\"");
+
+ // JSON no extension
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2018", "{\"id\":" + 1 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2019", "{\"id\":" + 2 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2020", "{\"id\":" + 3 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2018", "{\"id\":" + 4 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2019", "{\"id\":" + 5 + "}");
+ mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2020", "{\"id\":" + 6 + "}");
+ }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 387f7f6..6ecdc0e 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -18,15 +18,17 @@
*/
package org.apache.asterix.test.external_dataset.aws;
+import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
+import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setUploaders;
import static org.apache.hyracks.util.file.FileUtil.joinPath;
import java.io.ByteArrayOutputStream;
import java.io.File;
+import java.io.IOException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.URI;
import java.nio.file.Files;
-import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.BitSet;
import java.util.Collection;
@@ -39,6 +41,7 @@
import org.apache.asterix.common.api.INcApplicationContext;
import org.apache.asterix.test.common.TestExecutor;
+import org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils;
import org.apache.asterix.test.runtime.ExecutionTestUtil;
import org.apache.asterix.test.runtime.LangExecutionUtil;
import org.apache.asterix.testframework.context.TestCaseContext;
@@ -50,7 +53,6 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.junit.AfterClass;
-import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.FixMethodOrder;
import org.junit.Test;
@@ -91,7 +93,6 @@
private static final String JSON_DATA_PATH = joinPath("data", "json");
private static final String CSV_DATA_PATH = joinPath("data", "csv");
private static final String TSV_DATA_PATH = joinPath("data", "tsv");
- private static final String MIXED_DATA_PATH = joinPath("data", "mixed");
// Service endpoint
private static final int MOCK_SERVER_PORT = 8001;
@@ -99,31 +100,27 @@
// Region, bucket and definitions
private static final String MOCK_SERVER_REGION = "us-west-2";
- private static final String MOCK_SERVER_BUCKET = "playground";
- private static final String FIXED_DATA_BUCKET = "fixed-data"; // Do not use, has fixed data
- private static final String INCLUDE_EXCLUDE_BUCKET = "include-exclude"; // include & exclude bucket
- private static final String JSON_DEFINITION = "json-data/reviews/"; // data resides here
- private static final String CSV_DEFINITION = "csv-data/reviews/"; // data resides here
- private static final String TSV_DEFINITION = "tsv-data/reviews/"; // data resides here
-
- // This is used for a test to generate over 1000 number of files
- private static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
- private static final int OVER_1000_OBJECTS_COUNT = 2999;
private static final Set<String> fileNames = new HashSet<>();
private static final CreateBucketRequest.Builder CREATE_BUCKET_BUILDER = CreateBucketRequest.builder();
private static final DeleteBucketRequest.Builder DELETE_BUCKET_BUILDER = DeleteBucketRequest.builder();
private static final PutObjectRequest.Builder PUT_OBJECT_BUILDER = PutObjectRequest.builder();
- // IMPORTANT: The following values must be used in the AWS S3 test case
private static S3Mock s3MockServer;
private static S3Client client;
- private static final PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(MOCK_SERVER_BUCKET);
- private static final PutObjectRequest.Builder includeExcludeBuilder =
- PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_BUCKET);
protected TestCaseContext tcCtx;
+ public static final String PLAYGROUND_CONTAINER = "playground";
+ public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not use, has fixed data
+ public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
+ public static final PutObjectRequest.Builder playgroundBuilder =
+ PutObjectRequest.builder().bucket(PLAYGROUND_CONTAINER);
+ public static final PutObjectRequest.Builder fixedDataBuilder =
+ PutObjectRequest.builder().bucket(FIXED_DATA_CONTAINER);
+ public static final PutObjectRequest.Builder includeExcludeBuilder =
+ PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER);
+
public AwsS3ExternalDatasetTest(TestCaseContext tcCtx) {
this.tcCtx = tcCtx;
}
@@ -156,9 +153,9 @@
SUITE_TESTS = "testsuite_external_dataset_s3.xml";
ONLY_TESTS = "only_external_dataset.xml";
TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
- PREPARE_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket;
- PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareFixedDataBucket;
- PREPARE_MIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareMixedDataBucket;
+ PREPARE_BUCKET = ExternalDatasetTestUtils::preparePlaygroundContainer;
+ PREPARE_FIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareFixedDataContainer;
+ PREPARE_MIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareMixedDataContainer;
return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
}
@@ -197,312 +194,56 @@
builder.region(Region.of(MOCK_SERVER_REGION)).credentialsProvider(AnonymousCredentialsProvider.create())
.endpointOverride(endpoint);
client = builder.build();
+ client.createBucket(CreateBucketRequest.builder().bucket(PLAYGROUND_CONTAINER).build());
+ client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_CONTAINER).build());
+ client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER).build());
LOGGER.info("Client created successfully");
// Create the bucket and upload some json files
+ setDataPaths(JSON_DATA_PATH, CSV_DATA_PATH, TSV_DATA_PATH);
+ setUploaders(AwsS3ExternalDatasetTest::loadPlaygroundData, AwsS3ExternalDatasetTest::loadFixedData,
+ AwsS3ExternalDatasetTest::loadMixedData);
PREPARE_BUCKET.run();
PREPARE_FIXED_DATA_BUCKET.run();
PREPARE_MIXED_DATA_BUCKET.run();
}
- /**
- * Creates a bucket and fills it with some files for testing purpose.
- */
- private static void prepareS3Bucket() {
- LOGGER.info("creating bucket " + MOCK_SERVER_BUCKET);
- client.createBucket(CreateBucketRequest.builder().bucket(MOCK_SERVER_BUCKET).build());
- LOGGER.info("bucket created successfully");
-
- LOGGER.info("Adding JSON files to the bucket");
- loadJsonFiles();
- LOGGER.info("JSON Files added successfully");
-
- LOGGER.info("Adding CSV files to the bucket");
- loadCsvFiles();
- LOGGER.info("CSV Files added successfully");
-
- LOGGER.info("Adding TSV files to the bucket");
- loadTsvFiles();
- LOGGER.info("TSV Files added successfully");
-
- LOGGER.info("Loading " + OVER_1000_OBJECTS_COUNT + " into " + OVER_1000_OBJECTS_PATH);
- loadLargeNumberOfFiles();
- LOGGER.info("Added " + OVER_1000_OBJECTS_COUNT + " files into " + OVER_1000_OBJECTS_PATH + " successfully");
+ private static void loadPlaygroundData(String key, String content, boolean fromFile, boolean gzipped) {
+ client.putObject(playgroundBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
}
- /**
- * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
- * changed, the test case will fail and its result will need to be updated each time
- */
- private static void prepareFixedDataBucket() {
- LOGGER.info("creating bucket " + FIXED_DATA_BUCKET);
- client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_BUCKET).build());
- LOGGER.info("bucket " + FIXED_DATA_BUCKET + " created successfully");
-
- LOGGER.info("Loading fixed data to " + FIXED_DATA_BUCKET);
-
- // Files data
- RequestBody requestBody = RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", "20-records.json"));
- client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("1.json").build(), requestBody);
- client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("2.json").build(), requestBody);
- client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/3.json").build(), requestBody);
- client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/4.json").build(), requestBody);
- client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(), requestBody);
+ private static void loadFixedData(String key, String content, boolean fromFile, boolean gzipped) {
+ client.putObject(fixedDataBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
}
- private static void loadJsonFiles() {
- String dataBasePath = JSON_DATA_PATH;
- String definition = JSON_DEFINITION;
-
- // Normal format
- String definitionSegment = "json";
- loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
- false);
-
- definitionSegment = "json-array-of-objects";
- loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
-
- // gz compressed format
- definitionSegment = "gz";
- loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
- false);
-
- // Mixed normal and gz compressed format
- definitionSegment = "mixed";
- loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
- loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
- false);
- loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
- loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
- false);
+ private static void loadMixedData(String key, String content, boolean fromFile, boolean gzipped) {
+ client.putObject(includeExcludeBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
}
- private static void loadCsvFiles() {
- String dataBasePath = CSV_DATA_PATH;
- String definition = CSV_DEFINITION;
+ private static RequestBody getRequestBody(String content, boolean fromFile, boolean gzipped) {
+ RequestBody body;
- // Normal format
- String definitionSegment = "csv";
- loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
- loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-
- // gz compressed format
- definitionSegment = "gz";
- loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-
- // Mixed normal and gz compressed format
- definitionSegment = "mixed";
- loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
- loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
- }
-
- private static void loadTsvFiles() {
- String dataBasePath = TSV_DATA_PATH;
- String definition = TSV_DEFINITION;
-
- // Normal format
- String definitionSegment = "tsv";
- loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
- loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-
- // gz compressed format
- definitionSegment = "gz";
- loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-
- // Mixed normal and gz compressed format
- definitionSegment = "mixed";
- loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
- loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
- loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
- }
-
- private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
- String definitionSegment, boolean removeExtension) {
- loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
- }
-
- private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
- String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
- // Files data
- Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
- RequestBody requestBody = RequestBody.fromFile(filePath);
-
- // Keep or remove the file extension
- Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
- String finalFileName;
- if (removeExtension) {
- finalFileName = FilenameUtils.removeExtension(filename);
+ // Content is string
+ if (!fromFile) {
+ body = RequestBody.fromString(content);
} else {
- finalFileName = filename;
- }
-
- // Files base definition
- filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
- definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
- String basePath = definition + filePathSegment + definitionSegment;
-
- // Load the data
- client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
- if (copyToSubLevels) {
- client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
- }
- }
-
- private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
- String definitionSegment, boolean removeExtension) {
- try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
- GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
-
- // Files data
- Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
-
- // Get the compressed data
- gzipOutputStream.write(Files.readAllBytes(filePath));
- gzipOutputStream.close(); // Need to close or data will be invalid
- byte[] gzipBytes = byteArrayOutputStream.toByteArray();
- RequestBody requestBody = RequestBody.fromBytes(gzipBytes);
-
- // Keep or remove the file extension
- Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
- String finalFileName;
- if (removeExtension) {
- finalFileName = FilenameUtils.removeExtension(filename);
+ // Content is a file path
+ if (!gzipped) {
+ body = RequestBody.fromFile(Paths.get(content));
} else {
- finalFileName = filename;
+ try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
+ gzipOutputStream.write(Files.readAllBytes(Paths.get(content)));
+ gzipOutputStream.close(); // Need to close or data will be invalid
+ byte[] gzipBytes = byteArrayOutputStream.toByteArray();
+ body = RequestBody.fromBytes(gzipBytes);
+ } catch (IOException ex) {
+ throw new IllegalArgumentException(ex.toString());
+ }
}
- finalFileName += ".gz";
-
- // Files base definition
- filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
- definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
- String basePath = definition + filePathSegment + definitionSegment;
-
- // Load the data
- client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
- client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
- } catch (Exception ex) {
- LOGGER.error(ex.getMessage());
}
- }
- /**
- * Generates over 1000 objects and upload them to S3 mock server, 1 record per object
- */
- private static void loadLargeNumberOfFiles() {
- for (int i = 0; i < OVER_1000_OBJECTS_COUNT; i++) {
- RequestBody body = RequestBody.fromString("{\"id\":" + i + "}");
- client.putObject(builder.key(OVER_1000_OBJECTS_PATH + "/" + i + ".json").build(), body);
- }
- }
-
- /**
- * Loads a combination of different file formats in the same path
- */
- private static void prepareMixedDataBucket() {
- LOGGER.info("creating bucket " + INCLUDE_EXCLUDE_BUCKET);
- client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_BUCKET).build());
- LOGGER.info("bucket " + INCLUDE_EXCLUDE_BUCKET + " created successfully");
-
- // JSON
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2018.json").build(),
- RequestBody.fromString("{\"id\":" + 1 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2019.json").build(),
- RequestBody.fromString("{\"id\":" + 2 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2020.json").build(),
- RequestBody.fromString("{\"id\":" + 3 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2018.json").build(),
- RequestBody.fromString("{\"id\":" + 4 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2019.json").build(),
- RequestBody.fromString("{\"id\":" + 5 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2020.json").build(),
- RequestBody.fromString("{\"id\":" + 6 + "}"));
-
- // CSV
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2018.csv").build(),
- RequestBody.fromString("7,\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2019.csv").build(),
- RequestBody.fromString("8,\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2020.csv").build(),
- RequestBody.fromString("{9,\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2018.csv").build(),
- RequestBody.fromString("10,\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2019.csv").build(),
- RequestBody.fromString("11,\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2020.csv").build(),
- RequestBody.fromString("12,\"good\""));
-
- // TSV
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2018.tsv").build(),
- RequestBody.fromString("13\t\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2019.tsv").build(),
- RequestBody.fromString("14\t\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2020.tsv").build(),
- RequestBody.fromString("15\t\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2018.tsv").build(),
- RequestBody.fromString("16\t\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2019.tsv").build(),
- RequestBody.fromString("17\t\"good\""));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2020.tsv").build(),
- RequestBody.fromString("18\t\"good\""));
-
- // JSON no extension
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2018").build(),
- RequestBody.fromString("{\"id\":" + 1 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2019").build(),
- RequestBody.fromString("{\"id\":" + 2 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2020").build(),
- RequestBody.fromString("{\"id\":" + 3 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2018").build(),
- RequestBody.fromString("{\"id\":" + 4 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2019").build(),
- RequestBody.fromString("{\"id\":" + 5 + "}"));
- client.putObject(
- includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2020").build(),
- RequestBody.fromString("{\"id\":" + 6 + "}"));
+ return body;
}
static class AwsTestExecutor extends TestExecutor {
@@ -588,4 +329,4 @@
}
LOGGER.info("Done creating bucket with data");
}
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
index bc6a0c4..04fb1db 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*.json"),
("exclude1"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
index a8b2c02..321d5dc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*.json"),
("exclude#"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
index 5497e13..e26f01b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*.json"),
("exclude#hello"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
index 2540c6a..428ae5e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*.json"),
("exclude"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
index e2883bf..e47fbd8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*.?sv")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
index 25ae5af..85664b8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
@@ -29,9 +29,9 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json/extension*"),
-("exclude#100"="data/mixed/json/EXTENSION*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json/extension*"),
+("exclude#100"="mixed-data/reviews/json/EXTENSION*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
index edeabbd..b127693 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#100"="data/mixed/json*201?*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#100"="mixed-data/reviews/json*201?*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
index c2ad561..b3f0bcf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json*bye*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json*bye*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
index bf4b1a0..ff12e07 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json/extension/hello-world-2018.json")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json/extension/hello-world-2018.json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
index 3896068..d4804b5 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*.[abct][abcs][abcv]")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
index f0088c4..adef5d9 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
index 3419631..9d31cd3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*2018*.json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
index 2aac2b2..f676230 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="*[abc][.*")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
index c54635f..acf651c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="*.[a-c][a-z][a-z**||\\\\&&--~~]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
index 6996c56..30d3c63 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="[][!][^]]]]*[![*a-zA--&&^$||0-9B$\\*&&]*&&[^a-b||0--9][[[")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
index d37f273..ac496ec 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*201?*.json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
index cb42507..a367c04 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="*201?*.csv")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
index 1356218..44ce34e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="tsv"),
("header"=false),
("include"="*201?*.tsv")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
index 52facb3..ebaa926 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*bye*.json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
index 89bbcee..baa137c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("include"="data/mixed/json/*EXTENSION*")
+("include"="mixed-data/reviews/json/*EXTENSION*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
index de90340..7a569d8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
-("include"="data/mixed/json/NO-EXTENSION*"),
-("include#0"="data/mixed/json/EXTENSION*")
+("include"="mixed-data/reviews/json/NO-EXTENSION*"),
+("include#0"="mixed-data/reviews/json/EXTENSION*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
index 86a669d..75087f3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="*.[!xyt][!xyz][!xyz]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
index 0dc8472..ec8e59a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="csv"),
("header"=false),
("include"="*.[a-c][a-z][a-z]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
index 0255e87..6ebbe1c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
index 71130e2..0aceee0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
@@ -30,7 +30,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("exclude"="*")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
index 9edc6e6..8b136cc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
@@ -30,7 +30,7 @@
CREATE EXTERNAL DATASET test(test) USING %adapter% (
%template%,
("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
("format"="json"),
("include"="*.notRealExtension")
);
\ No newline at end of file