[ASTERIXDB-3153][OTH] Make the default storage format configurable

- user model changes: yes
- storage format changes: no
- interface changes: no

Details:
Currently, columnar datasets must be declared explicitly using
the WITH clause. We should extend AsterixDB's capability
to configure the default storage format (either row or column).

Change-Id: I173dd026528aa4d35dbdddcf1de4a55249c19caf
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17447
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Murtadha Al Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 15a8238..3fa7423 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -748,8 +748,8 @@
         boolean itemTypeAdded = false, metaItemTypeAdded = false;
 
         StorageProperties storageProperties = metadataProvider.getStorageProperties();
-        DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getColumnMaxTupleCount(),
-                storageProperties.getColumnFreeSpaceTolerance());
+        DatasetFormatInfo datasetFormatInfo = dd.getDatasetFormatInfo(storageProperties.getStorageFormat(),
+                storageProperties.getColumnMaxTupleCount(), storageProperties.getColumnFreeSpaceTolerance());
         try {
             // Check if the dataverse exists
             Dataverse dv = MetadataManager.INSTANCE.getDataverse(mdTxnCtx, dataverseName);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
index 5b99fa0..073da97 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/StorageProperties.java
@@ -63,7 +63,8 @@
         STORAGE_GLOBAL_CLEANUP(BOOLEAN, true),
         STORAGE_GLOBAL_CLEANUP_TIMEOUT(POSITIVE_INTEGER, (int) TimeUnit.MINUTES.toSeconds(10)),
         STORAGE_COLUMN_MAX_TUPLE_COUNT(NONNEGATIVE_INTEGER, 15000),
-        STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15);
+        STORAGE_COLUMN_FREE_SPACE_TOLERANCE(DOUBLE, 0.15),
+        STORAGE_FORMAT(STRING, "row");
 
         private final IOptionType interpreter;
         private final Object defaultValue;
@@ -136,6 +137,8 @@
                 case STORAGE_COLUMN_FREE_SPACE_TOLERANCE:
                     return "The percentage of the maximum tolerable empty space for a physical mega leaf page (e.g.,"
                             + " 0.15 means a physical page with 15% or less empty space is tolerable)";
+                case STORAGE_FORMAT:
+                    return "The default storage format (either row or column)";
                 default:
                     throw new IllegalStateException("NYI: " + this);
             }
@@ -280,4 +283,8 @@
     public float getColumnFreeSpaceTolerance() {
         return (float) accessor.getDouble(Option.STORAGE_COLUMN_FREE_SPACE_TOLERANCE);
     }
+
+    public String getStorageFormat() {
+        return accessor.getString(Option.STORAGE_FORMAT);
+    }
 }
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
index 8f48db0..b8d1bfd 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
@@ -134,14 +134,21 @@
                 .getOptionalString(DatasetDeclParametersUtil.STORAGE_BLOCK_COMPRESSION_SCHEME_PARAMETER_NAME);
     }
 
-    public DatasetFormatInfo getDatasetFormatInfo(int defaultMaxTupleCount, float defaultFreeSpaceTolerance) {
-        final AdmObjectNode datasetFormatNode =
-                (AdmObjectNode) withObjectNode.get(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME);
-        if (datasetType != DatasetType.INTERNAL || datasetFormatNode == null) {
-            return DatasetFormatInfo.DEFAULT;
+    public DatasetFormatInfo getDatasetFormatInfo(String defaultFormat, int defaultMaxTupleCount,
+            float defaultFreeSpaceTolerance) {
+        if (datasetType != DatasetType.INTERNAL) {
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
         }
-        DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(
-                datasetFormatNode.getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME));
+
+        AdmObjectNode datasetFormatNode = (AdmObjectNode) withObjectNode
+                .getOrDefault(DatasetDeclParametersUtil.DATASET_FORMAT_PARAMETER_NAME, AdmObjectNode.EMPTY);
+        DatasetConfig.DatasetFormat datasetFormat = DatasetConfig.DatasetFormat.getFormat(datasetFormatNode
+                .getOptionalString(DatasetDeclParametersUtil.DATASET_FORMAT_FORMAT_PARAMETER_NAME, defaultFormat));
+
+        if (datasetFormat == DatasetConfig.DatasetFormat.ROW) {
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
+        }
+
         int maxTupleCount = datasetFormatNode.getOptionalInt(
                 DatasetDeclParametersUtil.DATASET_FORMAT_MAX_TUPLE_COUNT_PARAMETER_NAME, defaultMaxTupleCount);
         float freeSpaceTolerance = datasetFormatNode.getOptionalFloat(
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
index 38951a4..86d9233 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/dataset/DatasetFormatInfo.java
@@ -20,11 +20,15 @@
 
 import java.io.Serializable;
 
+import org.apache.asterix.common.config.DatasetConfig;
 import org.apache.asterix.common.config.DatasetConfig.DatasetFormat;
 
 public class DatasetFormatInfo implements Serializable {
     private static final long serialVersionUID = 7656132322813253435L;
-    public static final DatasetFormatInfo DEFAULT = new DatasetFormatInfo();
+    /**
+     * System's default format for non-{@link DatasetConfig.DatasetType#INTERNAL} datasets
+     */
+    public static final DatasetFormatInfo SYSTEM_DEFAULT = new DatasetFormatInfo();
     private final DatasetFormat format;
     private final int maxTupleCount;
     private final float freeSpaceTolerance;
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
index a35be40..c0f2ddd 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entities/Dataset.java
@@ -164,7 +164,7 @@
             DatasetType datasetType, int datasetId, int pendingOp) {
         this(dataverseName, datasetName, recordTypeDataverseName, recordTypeName, /*metaTypeDataverseName*/null,
                 /*metaTypeName*/null, nodeGroupName, compactionPolicy, compactionPolicyProperties, datasetDetails,
-                hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+                hints, datasetType, datasetId, pendingOp, CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
     }
 
     public Dataset(DataverseName dataverseName, String datasetName, DataverseName itemTypeDataverseName,
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
index eafa331..790faa5 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslator.java
@@ -433,7 +433,7 @@
         int datasetFormatIndex =
                 datasetType.getFieldIndex(MetadataRecordTypes.DATASET_ARECORD_DATASET_FORMAT_FIELD_NAME);
         if (datasetFormatIndex < 0) {
-            return DatasetFormatInfo.DEFAULT;
+            return DatasetFormatInfo.SYSTEM_DEFAULT;
         }
 
         ARecordType datasetFormatType = (ARecordType) datasetType.getFieldTypes()[datasetFormatIndex];
@@ -676,7 +676,7 @@
 
     private void writeDatasetFormatInfo(Dataset dataset) throws HyracksDataException {
         DatasetFormatInfo info = dataset.getDatasetFormatInfo();
-        if (DatasetFormatInfo.DEFAULT == info) {
+        if (DatasetFormatInfo.SYSTEM_DEFAULT == info) {
             return;
         }
 
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
index 292ff16..b6f9df7 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/DatasetTupleTranslatorTest.java
@@ -57,7 +57,7 @@
                     DataverseName.createSinglePartName("foo"), "LogType", DataverseName.createSinglePartName("CB"),
                     "MetaType", "DEFAULT_NG_ALL_NODES", "prefix", compactionPolicyProperties, details,
                     Collections.emptyMap(), DatasetType.INTERNAL, 115, 0, CompressionManager.NONE,
-                    DatasetFormatInfo.DEFAULT);
+                    DatasetFormatInfo.SYSTEM_DEFAULT);
             DatasetTupleTranslator dtTranslator = new DatasetTupleTranslator(true);
             ITupleReference tuple = dtTranslator.getTupleFromMetadataEntity(dataset);
             Dataset deserializedDataset = dtTranslator.getMetadataEntityFromTuple(tuple);
diff --git a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
index 77c64d7..9f54478 100644
--- a/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
+++ b/asterixdb/asterix-metadata/src/test/java/org/apache/asterix/metadata/entitytupletranslators/IndexTupleTranslatorTest.java
@@ -70,7 +70,7 @@
             DataverseName dvCB = DataverseName.createSinglePartName("CB");
             Dataset dataset = new Dataset(dvTest, "d1", dvFoo, "LogType", dvCB, "MetaType", "DEFAULT_NG_ALL_NODES",
                     "prefix", compactionPolicyProperties, details, Collections.emptyMap(), DatasetType.INTERNAL, 115, 0,
-                    CompressionManager.NONE, DatasetFormatInfo.DEFAULT);
+                    CompressionManager.NONE, DatasetFormatInfo.SYSTEM_DEFAULT);
 
             Index index = new Index(dvTest, "d1", "i1", IndexType.BTREE,
                     Collections.singletonList(Collections.singletonList("row_id")),
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
index 966b9ba..bcabb18 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/object/base/AdmObjectNode.java
@@ -68,6 +68,15 @@
         return children.get(fieldName);
     }
 
+    public IAdmNode getOrDefault(String fieldName, IAdmNode defaultValue) {
+        IAdmNode node = get(fieldName);
+        return node != null ? node : defaultValue;
+    }
+
+    public IAdmNode getOrEmpty(String fieldName) {
+        return children.get(fieldName);
+    }
+
     public Set<String> getFieldNames() {
         return children.keySet();
     }
@@ -162,6 +171,11 @@
         return ((AdmStringNode) node).get();
     }
 
+    public String getOptionalString(String field, String defaultValue) {
+        String value = getOptionalString(field);
+        return value != null ? value : defaultValue;
+    }
+
     public int getOptionalInt(String field, int defaultValue) {
         final IAdmNode node = get(field);
         if (node == null) {