Fix for issue 534.
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/DatasetOperations.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/DatasetOperations.java
index fde3165..1f87ae0 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/DatasetOperations.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/DatasetOperations.java
@@ -33,7 +33,6 @@
import edu.uci.ics.asterix.formats.base.IDataFormat;
import edu.uci.ics.asterix.metadata.MetadataManager;
import edu.uci.ics.asterix.metadata.MetadataTransactionContext;
-import edu.uci.ics.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint;
import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
import edu.uci.ics.asterix.metadata.entities.Dataset;
import edu.uci.ics.asterix.metadata.entities.Dataverse;
@@ -256,14 +255,7 @@
}
LOGGER.info("LOAD into File Splits: " + sb.toString());
- String numElementsHintString = dataset.getHints().get("CARDINALITY");
- long numElementsHint;
- if (numElementsHintString == null) {
- numElementsHint = DatasetCardinalityHint.DEFAULT;
- } else {
- numElementsHint = Long.parseLong(dataset.getHints().get("CARDINALITY"));
- }
-
+ long numElementsHint = metadataProvider.getCardinalityPerPartitionHint(dataset);
AsterixStorageProperties storageProperties = AsterixAppContextInfo.getInstance().getStorageProperties();
TreeIndexBulkLoadOperatorDescriptor btreeBulkLoad = new TreeIndexBulkLoadOperatorDescriptor(spec,
AsterixRuntimeComponentsProvider.NOINDEX_PROVIDER, AsterixRuntimeComponentsProvider.NOINDEX_PROVIDER,
diff --git a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java
index 46f5b1a..fda9b45 100644
--- a/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java
+++ b/asterix-app/src/main/java/edu/uci/ics/asterix/file/SecondaryIndexCreator.java
@@ -29,7 +29,6 @@
import edu.uci.ics.asterix.formats.nontagged.AqlSerializerDeserializerProvider;
import edu.uci.ics.asterix.formats.nontagged.AqlTypeTraitProvider;
import edu.uci.ics.asterix.metadata.MetadataException;
-import edu.uci.ics.asterix.metadata.dataset.hints.DatasetHints.DatasetCardinalityHint;
import edu.uci.ics.asterix.metadata.declared.AqlMetadataProvider;
import edu.uci.ics.asterix.metadata.entities.Dataset;
import edu.uci.ics.asterix.metadata.entities.Index;
@@ -180,13 +179,7 @@
// Must be called in this order.
setPrimaryRecDescAndComparators();
setSecondaryRecDescAndComparators(createIndexStmt, metadataProvider);
-
- String numElementsHintString = dataset.getHints().get("CARDINALITY");
- if (numElementsHintString == null) {
- numElementsHint = DatasetCardinalityHint.DEFAULT;
- } else {
- numElementsHint = Long.parseLong(dataset.getHints().get("CARDINALITY"));
- }
+ numElementsHint = metadataProvider.getCardinalityPerPartitionHint(dataset);
}
protected void setPrimaryRecDescAndComparators() throws AlgebricksException {
diff --git a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java
index 51ab213..a78b540 100644
--- a/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java
+++ b/asterix-metadata/src/main/java/edu/uci/ics/asterix/metadata/declared/AqlMetadataProvider.java
@@ -788,13 +788,7 @@
dataSource.getId().getDataverseName(), datasetName, indexName);
IAsterixApplicationContextInfo appContext = (IAsterixApplicationContextInfo) context.getAppContext();
- String numElementsHintString = dataset.getHints().get("CARDINALITY");
- long numElementsHint;
- if (numElementsHintString == null) {
- numElementsHint = DatasetCardinalityHint.DEFAULT;
- } else {
- numElementsHint = Long.parseLong(dataset.getHints().get("CARDINALITY"));
- }
+ long numElementsHint = getCardinalityPerPartitionHint(dataset);
// TODO
// figure out the right behavior of the bulkload and then give the
@@ -1326,6 +1320,40 @@
}
}
+ /**
+ * Calculate an estimate size of the bloom filter. Note that this is an estimation which assumes that the data
+ * is going to be uniformly distributed across all partitions.
+ *
+ * @param dataset
+ * @return Number of elements that will be used to create a bloom filter per dataset per partition
+ * @throws MetadataException
+ * @throws AlgebricksException
+ */
+ public long getCardinalityPerPartitionHint(Dataset dataset) throws MetadataException, AlgebricksException {
+ String numElementsHintString = dataset.getHints().get("CARDINALITY");
+ long numElementsHint;
+ if (numElementsHintString == null) {
+ numElementsHint = DatasetCardinalityHint.DEFAULT;
+ } else {
+ numElementsHint = Long.parseLong(numElementsHintString);
+ }
+
+ int numPartitions = 0;
+ InternalDatasetDetails datasetDetails = (InternalDatasetDetails) dataset.getDatasetDetails();
+ List<String> nodeGroup = MetadataManager.INSTANCE.getNodegroup(mdTxnCtx, datasetDetails.getNodeGroupName())
+ .getNodeNames();
+ if (nodeGroup == null) {
+ throw new AlgebricksException("Couldn't find node group " + datasetDetails.getNodeGroupName());
+ }
+ for (String nd : nodeGroup) {
+ String[] nodeStores = stores.get(nd);
+ if (nodeStores != null) {
+ numPartitions += AsterixClusterProperties.INSTANCE.getNumberOfIODevices(nd);
+ }
+ }
+ return numElementsHint /= numPartitions;
+ }
+
@Override
public IFunctionInfo lookupFunction(FunctionIdentifier fid) {
return AsterixBuiltinFunctions.lookupFunction(fid);