[ASTERIXDB-2841][*DB][STO] Encode multiple-dataverse parts as subdirs on disk
- Multipart dataverse names are expressed on disk as a directory tree
- The first part is expressed normally, subsequent parts have a carat (^)
prepended
Change-Id: Idcfc45eb7f39153349a13d2baecb784244bdf177
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/10324
Reviewed-by: Michael Blow <mblow@apache.org>
Tested-by: Michael Blow <mblow@apache.org>
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
index bf72c19..e520271 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/DatasetCopyIdentifier.java
@@ -71,8 +71,8 @@
}
public boolean isMatch(ResourceReference resourceReference) {
- return resourceReference.getDataverse().equals(dataverse.getCanonicalForm())
- && resourceReference.getDataset().equals(dataset) && resourceReference.getRebalance().equals(rebalance);
+ return resourceReference.getDataverse().equals(dataverse) && resourceReference.getDataset().equals(dataset)
+ && resourceReference.getRebalance().equals(rebalance);
}
@Override
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
index 7791926..0e78152 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/storage/ResourceReference.java
@@ -21,24 +21,34 @@
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.asterix.common.metadata.DataverseName;
import org.apache.asterix.common.utils.StorageConstants;
+import org.apache.asterix.common.utils.StoragePathUtil;
+import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hyracks.storage.am.lsm.common.impls.IndexComponentFileReference;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
public class ResourceReference {
+ private static final Logger LOGGER = LogManager.getLogger();
protected final String root;
protected final String partition;
- protected final String dataverse; // == DataverseName.getCanonicalForm()
+ protected final DataverseName dataverse;
protected final String dataset;
protected final String rebalance;
protected final String index;
protected final String name;
- private volatile Path relativePath;
+ private final Path relativePath;
protected ResourceReference(String path) {
// format: root/partition/dataverse/dataset/rebalanceCount/index/fileName
+ // format: root/partition/dataverse_p1[/^dataverse_p2[/^dataverse_p3...]]/dataset/rebalanceCount/index/fileName
final String[] tokens = StringUtils.split(path, File.separatorChar);
if (tokens.length < 6) {
throw new IllegalStateException("Unrecognized path structure: " + path);
@@ -48,9 +58,40 @@
index = tokens[--offset];
rebalance = tokens[--offset];
dataset = tokens[--offset];
- dataverse = tokens[--offset]; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
- partition = tokens[--offset];
- root = tokens[--offset];
+ List<String> dvParts = new ArrayList<>();
+ String dvPart = tokens[--offset];
+ while (dvPart.charAt(0) == StoragePathUtil.DATAVERSE_CONTINUATION_MARKER) {
+ dvParts.add(dvPart.substring(1));
+ dvPart = tokens[--offset];
+ }
+ String probablyPartition = tokens[--offset];
+ if (dvParts.isEmpty()) {
+ // root/partition/dataverse/dataset/rebalanceCount/index/fileName
+ dataverse = DataverseName.createSinglePartName(dvPart);
+ partition = probablyPartition;
+ root = tokens[--offset];
+ } else if (probablyPartition.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+ // root/partition/dataverse_p1/^dataverse_p2/.../^dataverse_pn/dataset/rebalanceCount/index/fileName
+ dvParts.add(dvPart);
+ Collections.reverse(dvParts);
+ dataverse = DataverseName.create(dvParts);
+ partition = probablyPartition;
+ root = tokens[--offset];
+ } else if (dvPart.startsWith(StorageConstants.PARTITION_DIR_PREFIX)) {
+ // root/partition/dataverse/dataset/rebalanceCount/index/fileName (where dataverse starts with ^)
+ if (dvParts.size() != 1) {
+ throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+ }
+ dataverse =
+ DataverseName.createSinglePartName(StoragePathUtil.DATAVERSE_CONTINUATION_MARKER + dvParts.get(0));
+ LOGGER.info("legacy dataverse starting with ^ found: '{}'; this is not supported for new dataverses",
+ dataverse);
+ partition = dvPart;
+ root = probablyPartition;
+ } else {
+ throw new IllegalArgumentException("unable to parse path: '" + path + "'!");
+ }
+ relativePath = Paths.get(root, ArrayUtils.subarray(tokens, offset + 1, tokens.length - 1));
}
public static ResourceReference ofIndex(String indexPath) {
@@ -65,7 +106,7 @@
return partition;
}
- public String getDataverse() { //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+ public DataverseName getDataverse() {
return dataverse;
}
@@ -86,19 +127,15 @@
}
public Path getRelativePath() {
- if (relativePath == null) {
- relativePath = Paths.get(root, partition, dataverse, dataset, rebalance, index);
- }
return relativePath;
}
public ResourceReference getDatasetReference() {
- return ResourceReference
- .ofIndex(Paths.get(root, partition, dataverse, dataset, rebalance, dataset).toFile().getPath());
+ return ResourceReference.ofIndex(relativePath.getParent().resolve(dataset).toFile().getPath());
}
public Path getFileRelativePath() {
- return Paths.get(root, partition, dataverse, dataset, rebalance, index, name);
+ return relativePath.resolve(name);
}
public int getPartitionNum() {
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
index 587b8b3..32a226e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/utils/StoragePathUtil.java
@@ -21,6 +21,7 @@
import java.io.File;
import java.nio.file.Path;
import java.nio.file.Paths;
+import java.util.Iterator;
import org.apache.asterix.common.cluster.ClusterPartition;
import org.apache.asterix.common.metadata.DataverseName;
@@ -40,6 +41,7 @@
public class StoragePathUtil {
private static final Logger LOGGER = LogManager.getLogger();
+ public static final char DATAVERSE_CONTINUATION_MARKER = '^';
private StoragePathUtil() {
}
@@ -66,11 +68,21 @@
public static String prepareDataverseIndexName(DataverseName dataverseName, String datasetName, String idxName,
long rebalanceCount) {
- return prepareDataverseIndexName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
+ return prepareDataverseComponentName(dataverseName, prepareFullIndexName(datasetName, idxName, rebalanceCount));
}
- public static String prepareDataverseIndexName(DataverseName dataverseName, String fullIndexName) {
- return dataverseName.getCanonicalForm() + File.separator + fullIndexName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+ public static String prepareDataverseName(DataverseName dataverseName) {
+ Iterator<String> dvParts = dataverseName.getParts().iterator();
+ StringBuilder builder = new StringBuilder();
+ builder.append(dvParts.next());
+ while (dvParts.hasNext()) {
+ builder.append(File.separatorChar).append(DATAVERSE_CONTINUATION_MARKER).append(dvParts.next());
+ }
+ return builder.toString();
+ }
+
+ public static String prepareDataverseComponentName(DataverseName dataverseName, String component) {
+ return prepareDataverseName(dataverseName) + File.separatorChar + component;
}
private static String prepareFullIndexName(String datasetName, String idxName, long rebalanceCount) {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
index 2110dee..7f3d911 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/FeedUtils.java
@@ -86,10 +86,10 @@
public static FileSplit splitsForAdapter(DataverseName dataverseName, String feedName, String nodeName,
ClusterPartition partition) {
- String relPathFile = dataverseName.getCanonicalForm() + File.separator + feedName; //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
+ String relPathFile = StoragePathUtil.prepareDataverseComponentName(dataverseName, feedName);
String storagePartitionPath = StoragePathUtil.prepareStoragePartitionPath(partition.getPartitionId());
// Note: feed adapter instances in a single node share the feed logger
- // format: 'storage dir name'/partition_#/dataverse/feed/node
+ // format: 'storage dir name'/partition_#/dataverse_part1[/ dataverse_part2[...]]/feed/node
File f = new File(storagePartitionPath + File.separator + relPathFile + File.separator + nodeName);
return StoragePathUtil.getFileSplitForClusterPartition(partition, f.getPath());
}
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
index b93674c..c85f661 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/SplitsAndConstraintsUtil.java
@@ -48,11 +48,10 @@
DataverseName dataverseName) {
List<FileSplit> splits = new ArrayList<>();
// get all partitions
- ClusterPartition[] clusterPartition = clusterStateManager.getClusterPartitons();
- for (int j = 0; j < clusterPartition.length; j++) {
- File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition[j].getPartitionId()),
- dataverseName.getCanonicalForm()); //TODO(MULTI_PART_DATAVERSE_NAME):REVISIT
- splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition[j], f.getPath()));
+ for (ClusterPartition clusterPartition : clusterStateManager.getClusterPartitons()) {
+ File f = new File(StoragePathUtil.prepareStoragePartitionPath(clusterPartition.getPartitionId()),
+ StoragePathUtil.prepareDataverseName(dataverseName));
+ splits.add(StoragePathUtil.getFileSplitForClusterPartition(clusterPartition, f.getPath()));
}
return splits.toArray(new FileSplit[] {});
}