[ASTERIXDB-3153][OTH] Make the default storage format configurable
- user model changes: yes
- storage format changes: no
- interface changes: no
Details:
Currently, columnar datasets must be declared explicitly using
the WITH clause. We should extend AsterixDB's capability
to configure the default storage format (either row or column).
Change-Id: I173dd026528aa4d35dbdddcf1de4a55249c19caf
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17447
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Murtadha Al Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 15a8238..3fa7423 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -748,8 +748,8 @@
boolean itemTypeAdded = false, metaItemTypeAdded = false;
StorageProperties storageProperties = metadataProvider.getStorageProperties();
- DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getColumnMaxTupleCount(),
- storageProperties.getColumnFreeSpaceTolerance());
+ DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getStorageFormat(),
+ storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance());
try {
// Check if the dataverse exists
Dataverse dv = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, dataverseName);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
index 5b99fa0..073da97 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
@@ -63,7 +63,8 @@
STORAGE_GLOBAL_CLEANUP(BOOLEAN, true),
STORAGE_GLOBAL_CLEANUP_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toSeconds(10)),
STORAGE_COLUMN_MAX_TUPLE_COUNT(NONNEGATIVE_INTEGER, 15000),
- STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15);
+ STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15),
+ STORAGE_FORMAT(STRING, "row");
private final IOptionType interpreter;
private final Object defaultValue;
@@ -136,6 +137,8 @@
case STORAGE_COLUMN_FREE_SPACE_TOLERANCE:
return "The percentage of the maximum tolerable empty space for a physical mega leaf page (e.g.,"
+ " 0.15 means a physical page with 15% or less empty space is tolerable)";
+ case STORAGE_FORMAT:
+ return "The default storage format (either row or column)";
default:
throw new IllegalStateException("NYI: " + this);
}
@@ -280,4 +283,8 @@
public float getColumnFreeSpaceTolerance() {
return (float) accessor.getDouble(Option.STORAGE_COLUMN_FREE_SPACE_TOLERANCE);
}
+
+ public String getStorageFormat() {
+ return accessor.getString(Option.STORAGE_FORMAT);
+ }
}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
index 8f48db0..b8d1bfd 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
@@ -134,14 +134,21 @@
.getOptionalString(DatasetDeclParametersUtil.STORAGE_BLOCK_COMPRESSION_SCHEME_PARAMETER_NAME);
}
- public DatasetFormatInfo getDatasetFormatInfo(int defaultMaxTupleCount, float defaultFreeSpaceTolerance) {
- final AdmObjectNode datasetFormatNode =
- (AdmObjectNode) withObjectNode.get(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME);
- if (datasetType != DatasetType.INTERNAL || datasetFormatNode == null) {
- return DatasetFormatInfo.DEFAULT;
+ public DatasetFormatInfo getDatasetFormatInfo(String defaultFormat, int defaultMaxTupleCount,
+ float defaultFreeSpaceTolerance) {
+ if (datasetType != DatasetType.INTERNAL) {
+ return DatasetFormatInfo.SYSTEM_DEFAULT;
}
- DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(
- datasetFormatNode.getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME));
+
+ AdmObjectNode datasetFormatNode = (AdmObjectNode) withObjectNode
+ .getOrDefault(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME, AdmObjectNode.EMPTY);
+ DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(datasetFormatNode
+ .getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME, defaultFormat));
+
+ if (datasetFormat == DatasetConfig.DatasetFormat.ROW) {
+ return DatasetFormatInfo.SYSTEM_DEFAULT;
+ }
+
int maxTupleCount = datasetFormatNode.getOptionalInt(
DatasetDeclParametersUtil.DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, defaultMaxTupleCount);
float freeSpaceTolerance = datasetFormatNode.getOptionalFloat(
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
index 38951a4..86d9233 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
@@ -20,11 +20,15 @@
import java.io.Serializable;
+import org.apache.asterix.common.config.DatasetConfig;
import org.apache.asterix.common.config.DatasetConfig.DatasetFormat;
public class DatasetFormatInfo implements Serializable {
private static final long serialVersionUID = 7656132322813253435L;
- public static final DatasetFormatInfo DEFAULT = new DatasetFormatInfo();
+ /**
+ * System's default format for non-{@link DatasetConfig.DatasetType#INTERNAL} datasets
+ */
+ public static final DatasetFormatInfo SYSTEM_DEFAULT = new DatasetFormatInfo();
private final DatasetFormat format;
private final int maxTupleCount;
private final float freeSpaceTolerance;
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
index a35be40..c0f2ddd 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
@@ -164,7 +164,7 @@
DatasetType datasetType, int datasetId, int pendingOp) {
this(dataverseName, datasetName, recordTypeDataverseName, recordTypeName, /*metaTypeDataverseName*/null,
/*metaTypeName*/null, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails,
- hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+ hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
}
public Dataset(DataverseName dataverseName, String datasetName, DataverseName itemTypeDataverseName,
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
index eafa331..790faa5 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
@@ -433,7 +433,7 @@
int datasetFormatIndex =
datasetType.getFieldIndex(MetadataRecordTypes.DATASET_ARECORD_DATASET_FORMAT_FIELD_NAME);
if (datasetFormatIndex < 0) {
- return DatasetFormatInfo.DEFAULT;
+ return DatasetFormatInfo.SYSTEM_DEFAULT;
}
ARecordType datasetFormatType = (ARecordType) datasetType.getFieldTypes()[datasetFormatIndex];
@@ -676,7 +676,7 @@
private void writeDatasetFormatInfo(Dataset dataset) throws HyracksDataException {
DatasetFormatInfo info = dataset.getDatasetFormatInfo();
- if (DatasetFormatInfo.DEFAULT == info) {
+ if (DatasetFormatInfo.SYSTEM_DEFAULT == info) {
return;
}
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
index 292ff16..b6f9df7 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
@@ -57,7 +57,7 @@
DataverseName.createSinglePartName("foo"), "LogType", DataverseName.createSinglePartName("CB"),
"MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details,
Collections.emptyMap(), DatasetType.INTERNAL, 115, 0, CompressionManager.NONE,
- DatasetFormatInfo.DEFAULT);
+ DatasetFormatInfo.SYSTEM_DEFAULT);
DatasetTupleTranslator dtTranslator = new DatasetTupleTranslator(true);
ITupleReference tuple = dtTranslator.getTupleFromMetadataEntity(dataset);
Dataset deserializedDataset = dtTranslator.getMetadataEntityFromTuple(tuple);
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
index 77c64d7..9f54478 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
@@ -70,7 +70,7 @@
DataverseName dvCB = DataverseName.createSinglePartName("CB");
Dataset dataset = new Dataset(dvTest, "d1", dvFoo, "LogType", dvCB, "MetaType", "DEFAULT_NG_ALL_NODES",
"prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0,
- CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+ CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
Index index = new Index(dvTest, "d1", "i1", IndexType.BTREE,
Collections.singletonList(Collections.singletonList("row_id")),
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
index 966b9ba..bcabb18 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
@@ -68,6 +68,15 @@
return children.get(fieldName);
}
+ public IAdmNode getOrDefault(String fieldName, IAdmNode defaultValue) {
+ IAdmNode node = get(fieldName);
+ return node != null ? node : defaultValue;
+ }
+
+ public IAdmNode getOrEmpty(String fieldName) {
+ return children.get(fieldName);
+ }
+
public Set<String> getFieldNames() {
return children.keySet();
}
@@ -162,6 +171,11 @@
return ((AdmStringNode) node).get();
}
+ public String getOptionalString(String field, String defaultValue) {
+ String value = getOptionalString(field);
+ return value != null ? value : defaultValue;
+ }
+
public int getOptionalInt(String field, int defaultValue) {
final IAdmNode node = get(field);
if (node == null) {