[ASTERIXDB-2649][FUN] TPC-DS datasource function, generate proper data types
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Changed the TPC-DS datasource function to generate
the TPC-DS data with proper data types according to
the TPC-DS schema.
- Updated the TPC-DS test cases.
Change-Id: I20f6b8d043906ad62652d098e09ab70eb1d78b1b
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/3604
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java
index 3d08c01..72bf46f 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/TPCDSDataGeneratorReader.java
@@ -47,6 +47,14 @@
private final FunctionIdentifier functionIdentifier;
+ // Table name will be added to each generated record
+ private final static String TABLE_NAME_FIELD_NAME = "table_name";
+
+ // When generating the values, a list is created, at index 0, all the values for the parent record exist, if a
+ // child record is created, it is at index 1 in the list
+ private static final int PARENT_VALUES_INDEX = 0;
+ private static final int CHILD_VALUES_INDEX = 1;
+
// Table members
private final List<Table> selectedTables;
private final StringBuilder builder = new StringBuilder();
@@ -90,6 +98,44 @@
}
}
+ /**
+ * Gets the table matching the provided string table name, throws an exception if no table is returned.
+ *
+ * @param tableName String table name to search for.
+ * @return Table if found, throws an exception otherwise.
+ */
+ private List<Table> getTableFromStringTableName(String tableName) throws HyracksDataException {
+
+ // Get all the tables
+ if (generateAllTables) {
+ // Remove the DBGEN_VERSION table and all children tables, parent tables will generate them
+ return Table.getBaseTables().stream()
+ .filter(table -> !table.equals(Table.DBGEN_VERSION) && !table.isChild())
+ .collect(Collectors.toList());
+ }
+
+ // Search for the table
+ List<Table> matchedTables = Table.getBaseTables().stream()
+ .filter(table -> tableName.equalsIgnoreCase(table.getName())).collect(Collectors.toList());
+
+ // Ensure the table was found
+ if (matchedTables.isEmpty()) {
+ throw new RuntimeDataException(ErrorCode.TPCDS_INVALID_TABLE_NAME, getFunctionIdentifier().getName(),
+ tableName);
+ }
+
+ return matchedTables;
+ }
+
+ /**
+ * Gets the function identifier
+ *
+ * @return function identifier
+ */
+ private FunctionIdentifier getFunctionIdentifier() {
+ return functionIdentifier;
+ }
+
@Override
public boolean hasNext() {
@@ -144,21 +190,8 @@
// Clear the builder (This is faster than re-creating the builder each iteration)
builder.setLength(0);
- builder.append("{\"tableName\":\"");
- builder.append(currentTable.toString());
- builder.append("\"");
-
- // Build the record data
- for (int counter = 0; counter < values.get(0).size(); counter++) {
- builder.append(",\"");
- builder.append(currentTable.getColumns()[counter].getName());
- builder.append("\":\"");
- builder.append(values.get(0).get(counter));
- builder.append("\"");
- }
-
- // Close the record
- builder.append("}");
+ // Construct the record
+ constructRecord(values.get(PARENT_VALUES_INDEX), currentTable);
// Reference to the parent row to be returned, before resetting the builder again
String parentRow = builder.toString();
@@ -168,21 +201,9 @@
// are done
if (generateAllTables && values.size() > 1) {
builder.setLength(0);
- builder.append("{\"tableName\":\"");
- builder.append(currentTable.getChild().toString());
- builder.append("\"");
- // Build the record data
- for (int counter = 0; counter < values.get(1).size(); counter++) {
- builder.append(",\"");
- builder.append(currentTable.getChild().getColumns()[counter].getName());
- builder.append("\":\"");
- builder.append(values.get(0).get(counter));
- builder.append("\"");
- }
-
- // Close the record
- builder.append("}");
+ // Construct the record
+ constructRecord(values.get(CHILD_VALUES_INDEX), currentTable.getChild());
// Add it to the children rows list
childRow = builder.toString();
@@ -193,40 +214,53 @@
}
/**
- * Gets the table matching the provided string table name, throws an exception if no table is returned.
+ * Constructs the record with the appropriate data types.
*
- * @param tableName String table name to search for.
- * @return Table if found, throws an exception otherwise.
+ * @param values list containing all the generated values for all columns in a string format.
+ * @param table Table the record is being constructed for
*/
- private List<Table> getTableFromStringTableName(String tableName) throws HyracksDataException {
+ private void constructRecord(List<String> values, Table table) {
+ // Add the table name to the record
+ builder.append("{\"").append(TABLE_NAME_FIELD_NAME).append("\":\"").append(table.getName()).append("\"");
- // Get all the tables
- if (generateAllTables) {
- // Remove the DBGEN_VERSION table and all children tables, parent tables will generate them
- return Table.getBaseTables().stream()
- .filter(table -> !table.equals(Table.DBGEN_VERSION) && !table.isChild())
- .collect(Collectors.toList());
+ // Build the record data
+ for (int counter = 0; counter < values.size(); counter++) {
+
+ // If the value is null, no need to check for the column type
+ if (values.get(counter) == null) {
+ builder.append(",\"");
+ builder.append(table.getColumns()[counter].getName());
+ builder.append("\":");
+ builder.append(values.get(counter));
+ continue;
+ }
+
+ String fieldName = table.getColumns()[counter].getName();
+ String stringValue = values.get(counter);
+
+ // Convert the value to the appropriate type based on the column type
+ switch (table.getColumns()[counter].getType().getBase()) {
+ case INTEGER:
+ builder.append(",\"").append(fieldName).append("\":").append(Integer.valueOf(stringValue));
+ break;
+ case DECIMAL:
+ builder.append(",\"").append(fieldName).append("\":").append(Double.valueOf(stringValue));
+ break;
+ // IDENTIFIER type could be any value, so we're taking it as a string
+ // DATE and TIME are not supported, they are stored as strings and can be modified with date functions
+ // CHAR and VARCHAR are handled as strings
+ // any other type (default case) is handled as a string value
+ case IDENTIFIER:
+ case DATE:
+ case TIME:
+ case CHAR:
+ case VARCHAR:
+ default:
+ builder.append(",\"").append(fieldName).append("\":\"").append(stringValue).append("\"");
+ break;
+ }
}
- // Search for the table
- List<Table> matchedTables = Table.getBaseTables().stream()
- .filter(table -> tableName.equalsIgnoreCase(table.getName())).collect(Collectors.toList());
-
- // Ensure the table was found
- if (matchedTables.size() != 1) {
- throw new RuntimeDataException(ErrorCode.TPCDS_INVALID_TABLE_NAME, getFunctionIdentifier().getName(),
- tableName);
- }
-
- return matchedTables;
- }
-
- /**
- * Gets the function identifier
- *
- * @return function identifier
- */
- private FunctionIdentifier getFunctionIdentifier() {
- return functionIdentifier;
+ builder.append("}");
}
}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp
index 303c1c7..99151b8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.query.sqlpp
@@ -19,7 +19,7 @@
set `import-private-functions` `true`;
-select d.tableName, count(*) as count
+select d.table_name, count(*) as count
from tpcds_datagen(1) as d
-group by d.tableName
-order by d.tableName;
\ No newline at end of file
+group by d.table_name
+order by d.table_name;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm
index 4040b6b..b01e2cc 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/tpcds/datagen_sf_1_all_tables_2/datagen_sf_1_all_tables_2.1.adm
@@ -1,24 +1,24 @@
-{ "count": 6, "tableName": "CALL_CENTER" }
-{ "count": 11718, "tableName": "CATALOG_PAGE" }
-{ "count": 144067, "tableName": "CATALOG_RETURNS" }
-{ "count": 1441548, "tableName": "CATALOG_SALES" }
-{ "count": 100000, "tableName": "CUSTOMER" }
-{ "count": 50000, "tableName": "CUSTOMER_ADDRESS" }
-{ "count": 1920800, "tableName": "CUSTOMER_DEMOGRAPHICS" }
-{ "count": 73049, "tableName": "DATE_DIM" }
-{ "count": 7200, "tableName": "HOUSEHOLD_DEMOGRAPHICS" }
-{ "count": 20, "tableName": "INCOME_BAND" }
-{ "count": 11745000, "tableName": "INVENTORY" }
-{ "count": 18000, "tableName": "ITEM" }
-{ "count": 300, "tableName": "PROMOTION" }
-{ "count": 35, "tableName": "REASON" }
-{ "count": 20, "tableName": "SHIP_MODE" }
-{ "count": 12, "tableName": "STORE" }
-{ "count": 287514, "tableName": "STORE_RETURNS" }
-{ "count": 2880404, "tableName": "STORE_SALES" }
-{ "count": 86400, "tableName": "TIME_DIM" }
-{ "count": 5, "tableName": "WAREHOUSE" }
-{ "count": 60, "tableName": "WEB_PAGE" }
-{ "count": 71763, "tableName": "WEB_RETURNS" }
-{ "count": 719384, "tableName": "WEB_SALES" }
-{ "count": 30, "tableName": "WEB_SITE" }
\ No newline at end of file
+{ "count": 6, "table_name": "call_center" }
+{ "count": 11718, "table_name": "catalog_page" }
+{ "count": 144067, "table_name": "catalog_returns" }
+{ "count": 1441548, "table_name": "catalog_sales" }
+{ "count": 100000, "table_name": "customer" }
+{ "count": 50000, "table_name": "customer_address" }
+{ "count": 1920800, "table_name": "customer_demographics" }
+{ "count": 73049, "table_name": "date_dim" }
+{ "count": 7200, "table_name": "household_demographics" }
+{ "count": 20, "table_name": "income_band" }
+{ "count": 11745000, "table_name": "inventory" }
+{ "count": 18000, "table_name": "item" }
+{ "count": 300, "table_name": "promotion" }
+{ "count": 35, "table_name": "reason" }
+{ "count": 20, "table_name": "ship_mode" }
+{ "count": 12, "table_name": "store" }
+{ "count": 287514, "table_name": "store_returns" }
+{ "count": 2880404, "table_name": "store_sales" }
+{ "count": 86400, "table_name": "time_dim" }
+{ "count": 5, "table_name": "warehouse" }
+{ "count": 60, "table_name": "web_page" }
+{ "count": 71763, "table_name": "web_returns" }
+{ "count": 719384, "table_name": "web_sales" }
+{ "count": 30, "table_name": "web_site" }
\ No newline at end of file