[ASTERIXDB-3601][STO][CONF] adding cofig for selecting pagezero writer
- user model changes: no
- storage format changes: no
- interface changes: no
Ext-ref: MB-66306
Change-Id: Ia03eb46690a2f2cc924a00d1856e7551caf41ebd
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19989
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java
index 46dae1b..a1fcfe3 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnTupleWriter.java
@@ -146,26 +146,39 @@
@Override
public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns,
- boolean adaptive) {
- int totalNumberOfColumns = getAbsoluteNumberOfColumns(includeCurrentTupleColumns);
- totalNumberOfColumns = Math.min(totalNumberOfColumns, maxColumnsInPageZerothSegment);
+ IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) {
+ int spaceOccupiedByDefaultWriter;
+ int spaceOccupiedBySparseWriter;
- int spaceOccupiedByDefaultWriter = DefaultColumnMultiPageZeroWriter.EXTENDED_HEADER_SIZE + totalNumberOfColumns
- * (DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE + DefaultColumnPageZeroWriter.FILTER_SIZE);
-
- if (!adaptive) {
- // go for default multipage writer
+ if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.DEFAULT) {
+ // go for default multi-page writer
+ spaceOccupiedByDefaultWriter =
+ getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns);
return spaceOccupiedByDefaultWriter;
+ } else if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.SPARSE) {
+ // Maximum space occupied by the columns = maxColumnsInPageZerothSegment * (offset + filter size)
+ spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
+ return spaceOccupiedBySparseWriter;
}
- // Maximum space occupied by the columns = maxColumnsInPageZerothSegment * (offset + filter size)
- int spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
+ spaceOccupiedByDefaultWriter =
+ getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns);
+ spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
pageZeroWriterFlavorSelector.switchPageZeroWriterIfNeeded(spaceOccupiedByDefaultWriter,
- spaceOccupiedBySparseWriter, adaptive);
+ spaceOccupiedBySparseWriter);
return Math.min(spaceOccupiedBySparseWriter, spaceOccupiedByDefaultWriter);
}
+ private int getSpaceOccupiedByDefaultWriter(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns) {
+ int spaceOccupiedByDefaultWriter;
+ int totalNumberOfColumns = getAbsoluteNumberOfColumns(includeCurrentTupleColumns);
+ totalNumberOfColumns = Math.min(totalNumberOfColumns, maxColumnsInPageZerothSegment);
+ spaceOccupiedByDefaultWriter = DefaultColumnMultiPageZeroWriter.EXTENDED_HEADER_SIZE + totalNumberOfColumns
+ * (DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE + DefaultColumnPageZeroWriter.FILTER_SIZE);
+ return spaceOccupiedByDefaultWriter;
+ }
+
private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment) {
int presentColumns = transformerForCurrentTuple.getNumberOfVisitedColumnsInBatch();
int numberOfPagesRequired = (int) Math.ceil(
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java
index 293f764..d31e1d3 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/merge/MergeColumnTupleWriter.java
@@ -156,25 +156,38 @@
@Override
public int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns,
- boolean adaptive) {
- int totalNumberOfColumns = getAbsoluteNumberOfColumns(includeCurrentTupleColumns);
- totalNumberOfColumns = Math.min(totalNumberOfColumns, maxColumnsInPageZerothSegment);
+ IColumnPageZeroWriter.ColumnPageZeroWriterType writerType) {
+ int spaceOccupiedByDefaultWriter;
+ int spaceOccupiedBySparseWriter;
- int spaceOccupiedByDefaultWriter = DefaultColumnMultiPageZeroWriter.EXTENDED_HEADER_SIZE + totalNumberOfColumns
- * (DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE + DefaultColumnPageZeroWriter.FILTER_SIZE);
-
- if (!adaptive) {
- // go for default multipage writer
+ if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.DEFAULT) {
+ // go for default multi-page writer
+ spaceOccupiedByDefaultWriter =
+ getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns);
return spaceOccupiedByDefaultWriter;
+ } else if (writerType == IColumnPageZeroWriter.ColumnPageZeroWriterType.SPARSE) {
+ // Maximum space occupied by the columns = maxColumnsInPageZerothSegment * (offset + filter size)
+ spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
+ return spaceOccupiedBySparseWriter;
}
- // space occupied by the sparse writer
- int spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
+ spaceOccupiedBySparseWriter = getSpaceOccupiedBySparseWriter(maxColumnsInPageZerothSegment);
+ spaceOccupiedByDefaultWriter =
+ getSpaceOccupiedByDefaultWriter(maxColumnsInPageZerothSegment, includeCurrentTupleColumns);
pageZeroWriterFlavorSelector.switchPageZeroWriterIfNeeded(spaceOccupiedByDefaultWriter,
- spaceOccupiedBySparseWriter, adaptive);
+ spaceOccupiedBySparseWriter);
return Math.min(spaceOccupiedBySparseWriter, spaceOccupiedByDefaultWriter);
}
+ private int getSpaceOccupiedByDefaultWriter(int maxColumnsInPageZerothSegment, boolean includeCurrentTupleColumns) {
+ int spaceOccupiedByDefaultWriter;
+ int totalNumberOfColumns = getAbsoluteNumberOfColumns(includeCurrentTupleColumns);
+ totalNumberOfColumns = Math.min(totalNumberOfColumns, maxColumnsInPageZerothSegment);
+ spaceOccupiedByDefaultWriter = DefaultColumnMultiPageZeroWriter.EXTENDED_HEADER_SIZE + totalNumberOfColumns
+ * (DefaultColumnPageZeroWriter.COLUMN_OFFSET_SIZE + DefaultColumnPageZeroWriter.FILTER_SIZE);
+ return spaceOccupiedByDefaultWriter;
+ }
+
private int getSpaceOccupiedBySparseWriter(int maxColumnsInPageZerothSegment) {
int presentColumns = presentColumnsIndexes.cardinality();
int numberOfPagesRequired = (int) Math.ceil(
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/PageZeroWriterFlavorSelector.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/PageZeroWriterFlavorSelector.java
index c4f9bd5..06e441a 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/PageZeroWriterFlavorSelector.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/zero/PageZeroWriterFlavorSelector.java
@@ -50,7 +50,7 @@
*/
public class PageZeroWriterFlavorSelector implements IColumnPageZeroWriterFlavorSelector {
// Flag indicating which writer type is currently selected (DEFAULT_WRITER_FLAG=default, SPARSE_WRITER_FLAG=sparse)
- protected byte writerFlag = MULTI_PAGE_DEFAULT_WRITER_FLAG;
+ protected byte writerFlag = IColumnPageZeroWriter.ColumnPageZeroWriterType.ADAPTIVE.getWriterFlag();
// Cache of writer instances to avoid repeated object creation
private final Byte2ObjectArrayMap<IColumnPageZeroWriter> writers;
@@ -72,13 +72,7 @@
* @param spaceOccupiedBySparseWriter Space in bytes required by the sparse writer
*/
@Override
- public void switchPageZeroWriterIfNeeded(int spaceOccupiedByDefaultWriter, int spaceOccupiedBySparseWriter,
- boolean adaptive) {
- if (!adaptive) {
- // If not adaptive, always use the default writer
- writerFlag = MULTI_PAGE_DEFAULT_WRITER_FLAG;
- return;
- }
+ public void switchPageZeroWriterIfNeeded(int spaceOccupiedByDefaultWriter, int spaceOccupiedBySparseWriter) {
if (spaceOccupiedByDefaultWriter <= spaceOccupiedBySparseWriter) {
// Default writer is more space-efficient (or equal), use it
writerFlag = MULTI_PAGE_DEFAULT_WRITER_FLAG;
@@ -88,10 +82,15 @@
}
}
+ @Override
+ public void setPageZeroWriterFlag(byte writerFlag) {
+ this.writerFlag = writerFlag;
+ }
+
/**
* Returns the currently selected page zero writer instance.
* Writers are cached to avoid repeated object creation.
- *
+ *
* @return the selected writer instance
* @throws IllegalStateException if an unsupported writer flag is encountered
*/
@@ -107,11 +106,16 @@
};
}
+ @Override
+ public byte getWriterFlag() {
+ return writerFlag;
+ }
+
/**
* Creates a page zero reader instance based on the provided flag.
* This method is used during deserialization to create the appropriate reader
* for the writer type that was used during serialization.
- *
+ *
* @param flag The flag code identifying the writer type (DEFAULT_WRITER_FLAG=default, SPARSE_WRITER_FLAG=sparse)
* @return the appropriate reader instance
* @throws IllegalStateException if an unsupported reader flag is encountered
diff --git a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java
index faa2a87..6e85d29 100644
--- a/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java
+++ b/asterixdb/asterix-column/src/test/java/org/apache/asterix/column/test/bytes/AbstractBytesTest.java
@@ -62,6 +62,7 @@
import org.apache.hyracks.storage.am.lsm.btree.column.api.AbstractColumnTupleWriter;
import org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp;
import org.apache.hyracks.storage.am.lsm.btree.column.cloud.buffercache.write.DefaultColumnWriteContext;
+import org.apache.hyracks.storage.am.lsm.btree.column.impls.btree.IColumnPageZeroWriter;
import org.apache.hyracks.util.StorageUtil;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
@@ -202,7 +203,8 @@
//Reserved for the number of pages
int requiredFreeSpace = HEADER_SIZE;
//Columns' Offsets
- requiredFreeSpace += columnWriter.getPageZeroWriterOccupiedSpace(100, true, false);
+ requiredFreeSpace += columnWriter.getPageZeroWriterOccupiedSpace(100, true,
+ IColumnPageZeroWriter.ColumnPageZeroWriterType.DEFAULT);
//Occupied space from previous writes
requiredFreeSpace += columnWriter.getPrimaryKeysEstimatedSize();
//New tuple required space
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 0e7de33..9a8d9ac 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -108,7 +108,7 @@
.joinPath(appConfig.getString(ControllerConfig.Option.DEFAULT_DIR), "passwd"),
ControllerConfig.Option.DEFAULT_DIR.cmdline() + "/passwd"),
STORAGE_MAX_COLUMNS_IN_ZEROTH_SEGMENT(INTEGER_BYTE_UNIT, 5000),
- ADAPTIVE_PAGE_ZERO_WRITER_SELECTION(BOOLEAN, false);
+ STORAGE_PAGE_ZERO_WRITER(STRING, "default");
private final IOptionType parser;
private final String defaultValueDescription;
@@ -261,9 +261,9 @@
return "The maximum time to wait for the tasks to be aborted";
case STORAGE_MAX_COLUMNS_IN_ZEROTH_SEGMENT:
return "The maximum number of columns in zero segment (default: 5000).";
- case ADAPTIVE_PAGE_ZERO_WRITER_SELECTION:
- return "The config to choose between writers dynamically (default: false, i.e. use the "
- + "default writer for all segments).";
+ case STORAGE_PAGE_ZERO_WRITER:
+ return "The config to choose between writers for page zero. (Possible values: default, sparse, adaptive), "
+ + "(default value: default)";
default:
throw new IllegalStateException("Not yet implemented: " + this);
}
@@ -647,7 +647,7 @@
return appConfig.getInt(Option.STORAGE_MAX_COLUMNS_IN_ZEROTH_SEGMENT);
}
- public boolean isAdaptivePageZeroWriterSelection() {
- return appConfig.getBoolean(Option.ADAPTIVE_PAGE_ZERO_WRITER_SELECTION);
+ public String getStoragePageZeroWriter() {
+ return appConfig.getString(Option.STORAGE_PAGE_ZERO_WRITER);
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java
index 705df6a..f5cef05 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/api/AbstractColumnTupleWriter.java
@@ -65,7 +65,7 @@
* @return the size needed to store columns' offsets
*/
public abstract int getPageZeroWriterOccupiedSpace(int maxColumnsInPageZerothSegment,
- boolean includeCurrentTupleColumns, boolean adaptive);
+ boolean includeCurrentTupleColumns, IColumnPageZeroWriter.ColumnPageZeroWriterType adaptive);
/**
* @return maximum number of tuples to be stored per page (i.e., page0)
@@ -106,4 +106,10 @@
* @return
*/
public abstract IColumnPageZeroWriterFlavorSelector getColumnPageZeroWriterFlavorSelector();
+
+ public void setWriterType(IColumnPageZeroWriter.ColumnPageZeroWriterType pageZeroWriterType) {
+ if (pageZeroWriterType != IColumnPageZeroWriter.ColumnPageZeroWriterType.ADAPTIVE) {
+ getColumnPageZeroWriterFlavorSelector().setPageZeroWriterFlag(pageZeroWriterType.getWriterFlag());
+ }
+ }
}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java
index 6826f3b..8439373 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/ColumnBTreeBulkloader.java
@@ -57,7 +57,7 @@
private final ISplitKey lowKey;
private final IColumnWriteContext columnWriteContext;
private final int maxColumnsInPageZerothSegment;
- private final boolean adaptiveWriter;
+ private final IColumnPageZeroWriter.ColumnPageZeroWriterType pageZeroWriterType;
private boolean setLowKey;
private int tupleCount;
@@ -87,7 +87,8 @@
// Writer config
maxColumnsInPageZerothSegment = storageConfig.getStorageMaxColumnsInZerothSegment();
- adaptiveWriter = storageConfig.isAdaptivePageZeroWriterSelection();
+ pageZeroWriterType = IColumnPageZeroWriter.ColumnPageZeroWriterType
+ .valueOf(storageConfig.getStoragePageZeroWriter().toUpperCase());
// For logging. Starts with 1 for page0
numberOfPagesInCurrentLeafNode = 1;
@@ -123,6 +124,8 @@
private boolean isFull(ITupleReference tuple) throws HyracksDataException {
if (tupleCount == 0) {
columnWriter.updateColumnMetadataForCurrentTuple(tuple);
+ // this is for non-adaptive case.
+ columnWriter.setWriterType(pageZeroWriterType);
return false;
} else if (tupleCount >= columnWriter.getMaxNumberOfTuples()) {
//We reached the maximum number of tuples
@@ -131,7 +134,7 @@
//Columns' Offsets
columnWriter.updateColumnMetadataForCurrentTuple(tuple);
int requiredFreeSpace =
- columnWriter.getPageZeroWriterOccupiedSpace(maxColumnsInPageZerothSegment, true, adaptiveWriter);
+ columnWriter.getPageZeroWriterOccupiedSpace(maxColumnsInPageZerothSegment, true, pageZeroWriterType);
//Occupied space from previous writes
requiredFreeSpace += columnWriter.getPrimaryKeysEstimatedSize();
//min and max tuples' sizes
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriter.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriter.java
index 3795c47..77fcee0 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriter.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriter.java
@@ -48,6 +48,22 @@
int MIN_COLUMN_SPACE = 4 + 16; // offset + filter size
+ enum ColumnPageZeroWriterType {
+ DEFAULT((byte) 2), // multi-page default page zero
+ SPARSE((byte) 3), // multi-page sparse page zero
+ ADAPTIVE((byte) -1); // adaptive writer that switches between default and sparse based on space efficiency
+
+ private final byte writerFlag;
+
+ ColumnPageZeroWriterType(byte writerFlag) {
+ this.writerFlag = writerFlag;
+ }
+
+ public byte getWriterFlag() {
+ return writerFlag;
+ }
+ }
+
/**
* Initializes the writer with page zero buffer and column information.
*
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriterFlavorSelector.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriterFlavorSelector.java
index fa8e3f6..0dc4d4d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriterFlavorSelector.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-btree-column/src/main/java/org/apache/hyracks/storage/am/lsm/btree/column/impls/btree/IColumnPageZeroWriterFlavorSelector.java
@@ -34,16 +34,17 @@
/**
* Evaluates and switches the page zero writer based on space efficiency.
- *
+ * <p>
* This method compares the space requirements of both writer implementations
* and selects the one that uses less space. The decision is made dynamically
* for each batch of data to optimize storage utilization.
- *
+ *
* @param spaceOccupiedByDefaultWriter Space in bytes required by the default writer
- * @param spaceOccupiedBySparseWriter Space in bytes required by the sparse writer
+ * @param spaceOccupiedBySparseWriter Space in bytes required by the sparse writer
*/
- void switchPageZeroWriterIfNeeded(int spaceOccupiedByDefaultWriter, int spaceOccupiedBySparseWriter,
- boolean adaptive);
+ void switchPageZeroWriterIfNeeded(int spaceOccupiedByDefaultWriter, int spaceOccupiedBySparseWriter);
+
+ byte getWriterFlag();
/**
* Creates the appropriate page zero reader for the given writer type.
@@ -58,6 +59,8 @@
*/
IColumnPageZeroReader createPageZeroReader(byte flag, int capacity);
+ void setPageZeroWriterFlag(byte writerFlag);
+
/**
* Returns the currently selected page zero writer instance.
*