Reuse/share code for external dataset tests (p1)

Change-Id: Ie5caebd98a3c42536654035d5bc3954eaca6e6d5
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/11564
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
new file mode 100644
index 0000000..3c129b4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/ExternalDatasetTestUtils.java
@@ -0,0 +1,347 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.test.external_dataset;
+
+import static org.apache.asterix.test.external_dataset.aws.AwsS3ExternalDatasetTest.*;
+
+import java.io.BufferedWriter;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.asterix.testframework.context.TestCaseContext;
+import org.apache.commons.io.FilenameUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.junit.Assert;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public class ExternalDatasetTestUtils {
+
+    protected static final Logger LOGGER = LogManager.getLogger();
+
+    // Base directory paths for data files
+    private static String JSON_DATA_PATH;
+    private static String CSV_DATA_PATH;
+    private static String TSV_DATA_PATH;
+
+    // IMPORTANT: The following values must be used in the AWS S3 test case
+    // Region, container and definitions
+    public static final String JSON_DEFINITION = "json-data/reviews/";
+    public static final String CSV_DEFINITION = "csv-data/reviews/";
+    public static final String TSV_DEFINITION = "tsv-data/reviews/";
+    public static final String MIXED_DEFINITION = "mixed-data/reviews/";
+
+    // This is used for a test to generate over 1000 number of files
+    public static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
+    public static final int OVER_1000_OBJECTS_COUNT = 2999;
+
+    private static Uploader playgroundDataLoader;
+    private static Uploader fixedDataLoader;
+    private static Uploader mixedDataLoader;
+
+    protected TestCaseContext tcCtx;
+
+    public interface Uploader {
+        default void upload(String key, String content) {
+            upload(key, content, false, false);
+        }
+
+        void upload(String key, String content, boolean fromFile, boolean gzipped);
+    }
+
+    public ExternalDatasetTestUtils(TestCaseContext tcCtx) {
+        this.tcCtx = tcCtx;
+    }
+
+    public static void setDataPaths(String jsonDataPath, String csvDataPath, String tsvDataPath) {
+        JSON_DATA_PATH = jsonDataPath;
+        CSV_DATA_PATH = csvDataPath;
+        TSV_DATA_PATH = tsvDataPath;
+    }
+
+    public static void setUploaders(Uploader playgroundDataLoader, Uploader fixedDataLoader, Uploader mixedDataLoader) {
+        ExternalDatasetTestUtils.playgroundDataLoader = playgroundDataLoader;
+        ExternalDatasetTestUtils.fixedDataLoader = fixedDataLoader;
+        ExternalDatasetTestUtils.mixedDataLoader = mixedDataLoader;
+    }
+
+    /**
+     * Creates a bucket and fills it with some files for testing purpose.
+     */
+    public static void preparePlaygroundContainer() {
+        LOGGER.info("Adding JSON files to the bucket");
+        loadJsonFiles();
+        LOGGER.info("JSON Files added successfully");
+
+        LOGGER.info("Adding CSV files to the bucket");
+        loadCsvFiles();
+        LOGGER.info("CSV Files added successfully");
+
+        LOGGER.info("Adding TSV files to the bucket");
+        loadTsvFiles();
+        LOGGER.info("TSV Files added successfully");
+
+        LOGGER.info("Adding a big JSON file");
+        loadBigJson();
+        LOGGER.info("JSON file added successfully");
+
+        LOGGER.info("Loading " + OVER_1000_OBJECTS_COUNT + " into " + OVER_1000_OBJECTS_PATH);
+        loadLargeNumberOfFiles();
+        LOGGER.info("Added " + OVER_1000_OBJECTS_COUNT + " files into " + OVER_1000_OBJECTS_PATH + " successfully");
+
+        LOGGER.info("Files added successfully");
+    }
+
+    /**
+     * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
+     * changed, the test case will fail and its result will need to be updated each time
+     */
+    public static void prepareFixedDataContainer() {
+        LOGGER.info("Loading fixed data to " + FIXED_DATA_CONTAINER);
+
+        // Files data
+        String path = Paths.get(JSON_DATA_PATH, "single-line", "20-records.json").toString();
+        fixedDataLoader.upload("1.json", path, true, false);
+        fixedDataLoader.upload("2.json", path, true, false);
+        fixedDataLoader.upload("lvl1/3.json", path, true, false);
+        fixedDataLoader.upload("lvl1/34.json", path, true, false);
+        fixedDataLoader.upload("lvl1/lvl2/5.json", path, true, false);
+    }
+
+    public static void loadJsonFiles() {
+        String dataBasePath = JSON_DATA_PATH;
+        String definition = JSON_DEFINITION;
+
+        // Normal format
+        String definitionSegment = "json";
+        loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+                false);
+
+        definitionSegment = "json-array-of-objects";
+        loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
+
+        // gz compressed format
+        definitionSegment = "gz";
+        loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+                false);
+
+        // Mixed normal and gz compressed format
+        definitionSegment = "mixed";
+        loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+        loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+                false);
+        loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
+                false);
+    }
+
+    private static void loadCsvFiles() {
+        String dataBasePath = CSV_DATA_PATH;
+        String definition = CSV_DEFINITION;
+
+        // Normal format
+        String definitionSegment = "csv";
+        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+
+        // gz compressed format
+        definitionSegment = "gz";
+        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+
+        // Mixed normal and gz compressed format
+        definitionSegment = "mixed";
+        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
+    }
+
+    private static void loadTsvFiles() {
+        String dataBasePath = TSV_DATA_PATH;
+        String definition = TSV_DEFINITION;
+
+        // Normal format
+        String definitionSegment = "tsv";
+        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+
+        // gz compressed format
+        definitionSegment = "gz";
+        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+
+        // Mixed normal and gz compressed format
+        definitionSegment = "mixed";
+        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
+        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
+    }
+
+    private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+            String definitionSegment, boolean removeExtension) {
+        loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
+    }
+
+    private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
+            String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
+        // Files data
+        Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
+
+        // Keep or remove the file extension
+        Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
+        String finalFileName;
+        if (removeExtension) {
+            finalFileName = FilenameUtils.removeExtension(filename);
+        } else {
+            finalFileName = filename;
+        }
+
+        // Files base definition
+        filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
+        definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
+        String basePath = definition + filePathSegment + definitionSegment;
+
+        // Load the data
+        String path = filePath.toString();
+        playgroundDataLoader.upload(basePath + finalFileName, path, true, false);
+        if (copyToSubLevels) {
+            playgroundDataLoader.upload(basePath + "level1a/" + finalFileName, path, true, false);
+            playgroundDataLoader.upload(basePath + "level1b/" + finalFileName, path, true, false);
+            playgroundDataLoader.upload(basePath + "level1a/level2a/" + finalFileName, path, true, false);
+            playgroundDataLoader.upload(basePath + "level1a/level2b/" + finalFileName, path, true, false);
+        }
+    }
+
+    private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
+            String definitionSegment, boolean removeExtension) {
+        // Keep or remove the file extension
+        Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
+        String finalFileName;
+        if (removeExtension) {
+            finalFileName = FilenameUtils.removeExtension(filename);
+        } else {
+            finalFileName = filename;
+        }
+        finalFileName += ".gz";
+
+        // Files base definition
+        filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
+        definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
+        String basePath = definition + filePathSegment + definitionSegment;
+
+        // Load the data
+        String path = Paths.get(fileBasePath, filePathSegment, filename).toString();
+        playgroundDataLoader.upload(basePath + finalFileName, path, true, true);
+        playgroundDataLoader.upload(basePath + "level1a/" + finalFileName, path, true, true);
+        playgroundDataLoader.upload(basePath + "level1b/" + finalFileName, path, true, true);
+        playgroundDataLoader.upload(basePath + "level1a/level2a/" + finalFileName, path, true, true);
+        playgroundDataLoader.upload(basePath + "level1a/level2b/" + finalFileName, path, true, true);
+    }
+
+    private static void loadBigJson() {
+        String fileName = "big_record.json";
+        int bufferSize = 4 * 1024 * 1024;
+        int maxSize = bufferSize * 9;
+        Path filePath = Paths.get("target", "rttest", "tmp", fileName);
+        try {
+            if (Files.notExists(filePath)) {
+                Files.createDirectories(filePath.getParent());
+                Files.createFile(filePath);
+            }
+        } catch (IOException ex) {
+            throw new IllegalStateException("File " + fileName + " not found");
+        }
+
+        try (FileWriter writer = new FileWriter(filePath.toFile(), false);
+                BufferedWriter bw = new BufferedWriter(writer, bufferSize)) {
+            bw.append("{ \"large_field\": \"");
+            for (int i = 0; i < maxSize; i++) {
+                bw.append('A');
+            }
+            bw.append("\" }");
+        } catch (IOException e) {
+            throw new IllegalStateException(e);
+        }
+        String key = "big-json/" + fileName;
+        playgroundDataLoader.upload(key, filePath.toString(), true, false);
+    }
+
+    /**
+     * Generates over 1000 objects and upload them to S3 mock server, 1 record per object
+     */
+    private static void loadLargeNumberOfFiles() {
+        for (int i = 0; i < OVER_1000_OBJECTS_COUNT; i++) {
+            playgroundDataLoader.upload(OVER_1000_OBJECTS_PATH + "/" + i + ".json", "{\"id\":" + i + "}");
+        }
+    }
+
+    /**
+     * Loads a combination of different file formats in the same path
+     */
+    public static void prepareMixedDataContainer() {
+        // JSON
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2018.json", "{\"id\":" + 1 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2019.json", "{\"id\":" + 2 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/extension/" + "hello-world-2020.json", "{\"id\":" + 3 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2018.json", "{\"id\":" + 4 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2019.json", "{\"id\":" + 5 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/EXTENSION/" + "goodbye-world-2020.json", "{\"id\":" + 6 + "}");
+
+        // CSV
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2018.csv", "7,\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2019.csv", "8,\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/extension/" + "hello-world-2020.csv", "9,\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2018.csv", "10,\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2019.csv", "11,\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "csv/EXTENSION/" + "goodbye-world-2020.csv", "12,\"good\"");
+
+        // TSV
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2018.tsv", "13\t\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2019.tsv", "14\t\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/extension/" + "hello-world-2020.tsv", "15\t\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2018.tsv", "16\t\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2019.tsv", "17\t\"good\"");
+        mixedDataLoader.upload(MIXED_DEFINITION + "tsv/EXTENSION/" + "goodbye-world-2020.tsv", "18\t\"good\"");
+
+        // JSON no extension
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2018", "{\"id\":" + 1 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2019", "{\"id\":" + 2 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/no-extension/" + "hello-world-2020", "{\"id\":" + 3 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2018", "{\"id\":" + 4 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2019", "{\"id\":" + 5 + "}");
+        mixedDataLoader.upload(MIXED_DEFINITION + "json/NO-EXTENSION/" + "goodbye-world-2020", "{\"id\":" + 6 + "}");
+    }
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
index 387f7f6..6ecdc0e 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/aws/AwsS3ExternalDatasetTest.java
@@ -18,15 +18,17 @@
  */
 package org.apache.asterix.test.external_dataset.aws;
 
+import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setDataPaths;
+import static org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils.setUploaders;
 import static org.apache.hyracks.util.file.FileUtil.joinPath;
 
 import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.IOException;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.URI;
 import java.nio.file.Files;
-import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.BitSet;
 import java.util.Collection;
@@ -39,6 +41,7 @@
 
 import org.apache.asterix.common.api.INcApplicationContext;
 import org.apache.asterix.test.common.TestExecutor;
+import org.apache.asterix.test.external_dataset.ExternalDatasetTestUtils;
 import org.apache.asterix.test.runtime.ExecutionTestUtil;
 import org.apache.asterix.test.runtime.LangExecutionUtil;
 import org.apache.asterix.testframework.context.TestCaseContext;
@@ -50,7 +53,6 @@
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 import org.junit.AfterClass;
-import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.FixMethodOrder;
 import org.junit.Test;
@@ -91,7 +93,6 @@
     private static final String JSON_DATA_PATH = joinPath("data", "json");
     private static final String CSV_DATA_PATH = joinPath("data", "csv");
     private static final String TSV_DATA_PATH = joinPath("data", "tsv");
-    private static final String MIXED_DATA_PATH = joinPath("data", "mixed");
 
     // Service endpoint
     private static final int MOCK_SERVER_PORT = 8001;
@@ -99,31 +100,27 @@
 
     // Region, bucket and definitions
     private static final String MOCK_SERVER_REGION = "us-west-2";
-    private static final String MOCK_SERVER_BUCKET = "playground";
-    private static final String FIXED_DATA_BUCKET = "fixed-data"; // Do not use, has fixed data
-    private static final String INCLUDE_EXCLUDE_BUCKET = "include-exclude"; // include & exclude bucket
-    private static final String JSON_DEFINITION = "json-data/reviews/"; // data resides here
-    private static final String CSV_DEFINITION = "csv-data/reviews/"; // data resides here
-    private static final String TSV_DEFINITION = "tsv-data/reviews/"; // data resides here
-
-    // This is used for a test to generate over 1000 number of files
-    private static final String OVER_1000_OBJECTS_PATH = "over-1000-objects";
-    private static final int OVER_1000_OBJECTS_COUNT = 2999;
 
     private static final Set<String> fileNames = new HashSet<>();
     private static final CreateBucketRequest.Builder CREATE_BUCKET_BUILDER = CreateBucketRequest.builder();
     private static final DeleteBucketRequest.Builder DELETE_BUCKET_BUILDER = DeleteBucketRequest.builder();
     private static final PutObjectRequest.Builder PUT_OBJECT_BUILDER = PutObjectRequest.builder();
 
-    // IMPORTANT: The following values must be used in the AWS S3 test case
     private static S3Mock s3MockServer;
     private static S3Client client;
-    private static final PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(MOCK_SERVER_BUCKET);
-    private static final PutObjectRequest.Builder includeExcludeBuilder =
-            PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_BUCKET);
 
     protected TestCaseContext tcCtx;
 
+    public static final String PLAYGROUND_CONTAINER = "playground";
+    public static final String FIXED_DATA_CONTAINER = "fixed-data"; // Do not use, has fixed data
+    public static final String INCLUDE_EXCLUDE_CONTAINER = "include-exclude";
+    public static final PutObjectRequest.Builder playgroundBuilder =
+            PutObjectRequest.builder().bucket(PLAYGROUND_CONTAINER);
+    public static final PutObjectRequest.Builder fixedDataBuilder =
+            PutObjectRequest.builder().bucket(FIXED_DATA_CONTAINER);
+    public static final PutObjectRequest.Builder includeExcludeBuilder =
+            PutObjectRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER);
+
     public AwsS3ExternalDatasetTest(TestCaseContext tcCtx) {
         this.tcCtx = tcCtx;
     }
@@ -156,9 +153,9 @@
         SUITE_TESTS = "testsuite_external_dataset_s3.xml";
         ONLY_TESTS = "only_external_dataset.xml";
         TEST_CONFIG_FILE_NAME = "src/main/resources/cc.conf";
-        PREPARE_BUCKET = AwsS3ExternalDatasetTest::prepareS3Bucket;
-        PREPARE_FIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareFixedDataBucket;
-        PREPARE_MIXED_DATA_BUCKET = AwsS3ExternalDatasetTest::prepareMixedDataBucket;
+        PREPARE_BUCKET = ExternalDatasetTestUtils::preparePlaygroundContainer;
+        PREPARE_FIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareFixedDataContainer;
+        PREPARE_MIXED_DATA_BUCKET = ExternalDatasetTestUtils::prepareMixedDataContainer;
         return LangExecutionUtil.tests(ONLY_TESTS, SUITE_TESTS);
     }
 
@@ -197,312 +194,56 @@
         builder.region(Region.of(MOCK_SERVER_REGION)).credentialsProvider(AnonymousCredentialsProvider.create())
                 .endpointOverride(endpoint);
         client = builder.build();
+        client.createBucket(CreateBucketRequest.builder().bucket(PLAYGROUND_CONTAINER).build());
+        client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_CONTAINER).build());
+        client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_CONTAINER).build());
         LOGGER.info("Client created successfully");
 
         // Create the bucket and upload some json files
+        setDataPaths(JSON_DATA_PATH, CSV_DATA_PATH, TSV_DATA_PATH);
+        setUploaders(AwsS3ExternalDatasetTest::loadPlaygroundData, AwsS3ExternalDatasetTest::loadFixedData,
+                AwsS3ExternalDatasetTest::loadMixedData);
         PREPARE_BUCKET.run();
         PREPARE_FIXED_DATA_BUCKET.run();
         PREPARE_MIXED_DATA_BUCKET.run();
     }
 
-    /**
-     * Creates a bucket and fills it with some files for testing purpose.
-     */
-    private static void prepareS3Bucket() {
-        LOGGER.info("creating bucket " + MOCK_SERVER_BUCKET);
-        client.createBucket(CreateBucketRequest.builder().bucket(MOCK_SERVER_BUCKET).build());
-        LOGGER.info("bucket created successfully");
-
-        LOGGER.info("Adding JSON files to the bucket");
-        loadJsonFiles();
-        LOGGER.info("JSON Files added successfully");
-
-        LOGGER.info("Adding CSV files to the bucket");
-        loadCsvFiles();
-        LOGGER.info("CSV Files added successfully");
-
-        LOGGER.info("Adding TSV files to the bucket");
-        loadTsvFiles();
-        LOGGER.info("TSV Files added successfully");
-
-        LOGGER.info("Loading " + OVER_1000_OBJECTS_COUNT + " into " + OVER_1000_OBJECTS_PATH);
-        loadLargeNumberOfFiles();
-        LOGGER.info("Added " + OVER_1000_OBJECTS_COUNT + " files into " + OVER_1000_OBJECTS_PATH + " successfully");
+    private static void loadPlaygroundData(String key, String content, boolean fromFile, boolean gzipped) {
+        client.putObject(playgroundBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
     }
 
-    /**
-     * This bucket is being filled by fixed data, a test is counting all records in this bucket. If this bucket is
-     * changed, the test case will fail and its result will need to be updated each time
-     */
-    private static void prepareFixedDataBucket() {
-        LOGGER.info("creating bucket " + FIXED_DATA_BUCKET);
-        client.createBucket(CreateBucketRequest.builder().bucket(FIXED_DATA_BUCKET).build());
-        LOGGER.info("bucket " + FIXED_DATA_BUCKET + " created successfully");
-
-        LOGGER.info("Loading fixed data to " + FIXED_DATA_BUCKET);
-
-        // Files data
-        RequestBody requestBody = RequestBody.fromFile(Paths.get(JSON_DATA_PATH, "single-line", "20-records.json"));
-        client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("1.json").build(), requestBody);
-        client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("2.json").build(), requestBody);
-        client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/3.json").build(), requestBody);
-        client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/4.json").build(), requestBody);
-        client.putObject(builder.bucket(FIXED_DATA_BUCKET).key("lvl1/lvl2/5.json").build(), requestBody);
+    private static void loadFixedData(String key, String content, boolean fromFile, boolean gzipped) {
+        client.putObject(fixedDataBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
     }
 
-    private static void loadJsonFiles() {
-        String dataBasePath = JSON_DATA_PATH;
-        String definition = JSON_DEFINITION;
-
-        // Normal format
-        String definitionSegment = "json";
-        loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
-                false);
-
-        definitionSegment = "json-array-of-objects";
-        loadData(dataBasePath, "single-line", "array_of_objects.json", "json-data/", definitionSegment, false, false);
-
-        // gz compressed format
-        definitionSegment = "gz";
-        loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
-                false);
-
-        // Mixed normal and gz compressed format
-        definitionSegment = "mixed";
-        loadData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
-        loadData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
-                false);
-        loadGzData(dataBasePath, "single-line", "20-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines", "20-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines-with-arrays", "5-records.json", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "multi-lines-with-nested-objects", "5-records.json", definition, definitionSegment,
-                false);
+    private static void loadMixedData(String key, String content, boolean fromFile, boolean gzipped) {
+        client.putObject(includeExcludeBuilder.key(key).build(), getRequestBody(content, fromFile, gzipped));
     }
 
-    private static void loadCsvFiles() {
-        String dataBasePath = CSV_DATA_PATH;
-        String definition = CSV_DEFINITION;
+    private static RequestBody getRequestBody(String content, boolean fromFile, boolean gzipped) {
+        RequestBody body;
 
-        // Normal format
-        String definitionSegment = "csv";
-        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
-        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-
-        // gz compressed format
-        definitionSegment = "gz";
-        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-
-        // Mixed normal and gz compressed format
-        definitionSegment = "mixed";
-        loadData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
-        loadData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "01.csv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "02.csv", definition, definitionSegment, false);
-    }
-
-    private static void loadTsvFiles() {
-        String dataBasePath = TSV_DATA_PATH;
-        String definition = TSV_DEFINITION;
-
-        // Normal format
-        String definitionSegment = "tsv";
-        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
-        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-
-        // gz compressed format
-        definitionSegment = "gz";
-        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-
-        // Mixed normal and gz compressed format
-        definitionSegment = "mixed";
-        loadData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
-        loadData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "01.tsv", definition, definitionSegment, false);
-        loadGzData(dataBasePath, "", "02.tsv", definition, definitionSegment, false);
-    }
-
-    private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
-            String definitionSegment, boolean removeExtension) {
-        loadData(fileBasePath, filePathSegment, filename, definition, definitionSegment, removeExtension, true);
-    }
-
-    private static void loadData(String fileBasePath, String filePathSegment, String filename, String definition,
-            String definitionSegment, boolean removeExtension, boolean copyToSubLevels) {
-        // Files data
-        Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
-        RequestBody requestBody = RequestBody.fromFile(filePath);
-
-        // Keep or remove the file extension
-        Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
-        String finalFileName;
-        if (removeExtension) {
-            finalFileName = FilenameUtils.removeExtension(filename);
+        // Content is string
+        if (!fromFile) {
+            body = RequestBody.fromString(content);
         } else {
-            finalFileName = filename;
-        }
-
-        // Files base definition
-        filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
-        definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
-        String basePath = definition + filePathSegment + definitionSegment;
-
-        // Load the data
-        client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
-        if (copyToSubLevels) {
-            client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
-        }
-    }
-
-    private static void loadGzData(String fileBasePath, String filePathSegment, String filename, String definition,
-            String definitionSegment, boolean removeExtension) {
-        try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
-                GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
-
-            // Files data
-            Path filePath = Paths.get(fileBasePath, filePathSegment, filename);
-
-            // Get the compressed data
-            gzipOutputStream.write(Files.readAllBytes(filePath));
-            gzipOutputStream.close(); // Need to close or data will be invalid
-            byte[] gzipBytes = byteArrayOutputStream.toByteArray();
-            RequestBody requestBody = RequestBody.fromBytes(gzipBytes);
-
-            // Keep or remove the file extension
-            Assert.assertFalse("Files with no extension are not supported yet for external datasets", removeExtension);
-            String finalFileName;
-            if (removeExtension) {
-                finalFileName = FilenameUtils.removeExtension(filename);
+            // Content is a file path
+            if (!gzipped) {
+                body = RequestBody.fromFile(Paths.get(content));
             } else {
-                finalFileName = filename;
+                try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+                        GZIPOutputStream gzipOutputStream = new GZIPOutputStream(byteArrayOutputStream)) {
+                    gzipOutputStream.write(Files.readAllBytes(Paths.get(content)));
+                    gzipOutputStream.close(); // Need to close or data will be invalid
+                    byte[] gzipBytes = byteArrayOutputStream.toByteArray();
+                    body = RequestBody.fromBytes(gzipBytes);
+                } catch (IOException ex) {
+                    throw new IllegalArgumentException(ex.toString());
+                }
             }
-            finalFileName += ".gz";
-
-            // Files base definition
-            filePathSegment = filePathSegment.isEmpty() ? "" : filePathSegment + "/";
-            definitionSegment = definitionSegment.isEmpty() ? "" : definitionSegment + "/";
-            String basePath = definition + filePathSegment + definitionSegment;
-
-            // Load the data
-            client.putObject(builder.key(basePath + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1a/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1b/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1a/level2a/" + finalFileName).build(), requestBody);
-            client.putObject(builder.key(basePath + "level1a/level2b/" + finalFileName).build(), requestBody);
-        } catch (Exception ex) {
-            LOGGER.error(ex.getMessage());
         }
-    }
 
-    /**
-     * Generates over 1000 objects and upload them to S3 mock server, 1 record per object
-     */
-    private static void loadLargeNumberOfFiles() {
-        for (int i = 0; i < OVER_1000_OBJECTS_COUNT; i++) {
-            RequestBody body = RequestBody.fromString("{\"id\":" + i + "}");
-            client.putObject(builder.key(OVER_1000_OBJECTS_PATH + "/" + i + ".json").build(), body);
-        }
-    }
-
-    /**
-     * Loads a combination of different file formats in the same path
-     */
-    private static void prepareMixedDataBucket() {
-        LOGGER.info("creating bucket " + INCLUDE_EXCLUDE_BUCKET);
-        client.createBucket(CreateBucketRequest.builder().bucket(INCLUDE_EXCLUDE_BUCKET).build());
-        LOGGER.info("bucket " + INCLUDE_EXCLUDE_BUCKET + " created successfully");
-
-        // JSON
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2018.json").build(),
-                RequestBody.fromString("{\"id\":" + 1 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2019.json").build(),
-                RequestBody.fromString("{\"id\":" + 2 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/extension/" + "hello-world-2020.json").build(),
-                RequestBody.fromString("{\"id\":" + 3 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2018.json").build(),
-                RequestBody.fromString("{\"id\":" + 4 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2019.json").build(),
-                RequestBody.fromString("{\"id\":" + 5 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/EXTENSION/" + "goodbye-world-2020.json").build(),
-                RequestBody.fromString("{\"id\":" + 6 + "}"));
-
-        // CSV
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2018.csv").build(),
-                RequestBody.fromString("7,\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2019.csv").build(),
-                RequestBody.fromString("8,\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/extension/" + "hello-world-2020.csv").build(),
-                RequestBody.fromString("{9,\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2018.csv").build(),
-                RequestBody.fromString("10,\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2019.csv").build(),
-                RequestBody.fromString("11,\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/csv/EXTENSION/" + "goodbye-world-2020.csv").build(),
-                RequestBody.fromString("12,\"good\""));
-
-        // TSV
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2018.tsv").build(),
-                RequestBody.fromString("13\t\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2019.tsv").build(),
-                RequestBody.fromString("14\t\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/extension/" + "hello-world-2020.tsv").build(),
-                RequestBody.fromString("15\t\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2018.tsv").build(),
-                RequestBody.fromString("16\t\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2019.tsv").build(),
-                RequestBody.fromString("17\t\"good\""));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/tsv/EXTENSION/" + "goodbye-world-2020.tsv").build(),
-                RequestBody.fromString("18\t\"good\""));
-
-        // JSON no extension
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2018").build(),
-                RequestBody.fromString("{\"id\":" + 1 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2019").build(),
-                RequestBody.fromString("{\"id\":" + 2 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/no-extension/" + "hello-world-2020").build(),
-                RequestBody.fromString("{\"id\":" + 3 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2018").build(),
-                RequestBody.fromString("{\"id\":" + 4 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2019").build(),
-                RequestBody.fromString("{\"id\":" + 5 + "}"));
-        client.putObject(
-                includeExcludeBuilder.key(MIXED_DATA_PATH + "/json/NO-EXTENSION/" + "goodbye-world-2020").build(),
-                RequestBody.fromString("{\"id\":" + 6 + "}"));
+        return body;
     }
 
     static class AwsTestExecutor extends TestExecutor {
@@ -588,4 +329,4 @@
         }
         LOGGER.info("Done creating bucket with data");
     }
-}
+}
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
index bc6a0c4..04fb1db 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*.json"),
 ("exclude1"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
index a8b2c02..321d5dc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-2/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*.json"),
 ("exclude#"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
index 5497e13..e26f01b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/bad-name-3/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*.json"),
 ("exclude#hello"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
index 2540c6a..428ae5e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/both/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*.json"),
 ("exclude"="*.json")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
index e2883bf..e47fbd8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*.?sv")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
index 25ae5af..85664b8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-2/test.000.ddl.sqlpp
@@ -29,9 +29,9 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json/extension*"),
-("exclude#100"="data/mixed/json/EXTENSION*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json/extension*"),
+("exclude#100"="mixed-data/reviews/json/EXTENSION*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
index edeabbd..b127693 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-3/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#100"="data/mixed/json*201?*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#100"="mixed-data/reviews/json*201?*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
index c2ad561..b3f0bcf 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-4/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json*bye*")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json*bye*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
index bf4b1a0..ff12e07 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-5/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("exclude"="data/mixed/?sv*"),
-("exclude#1"="data/mixed/json/extension/hello-world-2018.json")
+("exclude"="mixed-data/reviews/?sv*"),
+("exclude#1"="mixed-data/reviews/json/extension/hello-world-2018.json")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
index 3896068..d4804b5 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-6/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*.[abct][abcs][abcv]")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
index f0088c4..adef5d9 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/exclude-all/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
index 3419631..9d31cd3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-1/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*2018*.json")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
index 2aac2b2..f676230 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-10/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="*[abc][.*")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
index c54635f..acf651c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-11/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="*.[a-c][a-z][a-z**||\\\\&&--~~]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
index 6996c56..30d3c63 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-12/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="[][!][^]]]]*[![*a-zA--&&^$||0-9B$\\*&&]*&&[^a-b||0--9][[[")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
index d37f273..ac496ec 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-2/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*201?*.json")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
index cb42507..a367c04 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-3/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="*201?*.csv")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
index 1356218..44ce34e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-4/test.000.ddl.sqlpp
@@ -31,7 +31,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="tsv"),
 ("header"=false),
 ("include"="*201?*.tsv")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
index 52facb3..ebaa926 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-5/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*bye*.json")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
index 89bbcee..baa137c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-6/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("include"="data/mixed/json/*EXTENSION*")
+("include"="mixed-data/reviews/json/*EXTENSION*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
index de90340..7a569d8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-7/test.000.ddl.sqlpp
@@ -29,8 +29,8 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
-("include"="data/mixed/json/NO-EXTENSION*"),
-("include#0"="data/mixed/json/EXTENSION*")
+("include"="mixed-data/reviews/json/NO-EXTENSION*"),
+("include#0"="mixed-data/reviews/json/EXTENSION*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
index 86a669d..75087f3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-8/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="*.[!xyt][!xyz][!xyz]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
index 0dc8472..ec8e59a 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-9/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="csv"),
 ("header"=false),
 ("include"="*.[a-c][a-z][a-z]")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
index 0255e87..6ebbe1c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/include-exclude/include-all/test.000.ddl.sqlpp
@@ -29,7 +29,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
index 71130e2..0aceee0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/exclude-all-files/test.000.ddl.sqlpp
@@ -30,7 +30,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("exclude"="*")
 );
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
index 9edc6e6..8b136cc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/no-files-returned/include-no-files/test.000.ddl.sqlpp
@@ -30,7 +30,7 @@
 CREATE EXTERNAL DATASET test(test) USING %adapter% (
 %template%,
 ("container"="include-exclude"),
-("definition"="data/mixed/"),
+("definition"="mixed-data/reviews/"),
 ("format"="json"),
 ("include"="*.notRealExtension")
 );
\ No newline at end of file