[NO ISSUE]: Avoid calling cloud to return list of all cached/uncached files
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
- As listing all file keys from the cloud can take a long time
in case we have a huge number of files, we will depend on
listing the files from disk + get uncached from our cacher
tracker.
Ext-ref: MB-62555
Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Tested-by: Hussain Towaileb <hussainht@gmail.com>
Integration-Tests: Hussain Towaileb <hussainht@gmail.com>
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
index 033f135..4913f83 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
@@ -49,6 +49,7 @@
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.api.io.IFileHandle;
import org.apache.hyracks.api.io.IIOBulkOperation;
+import org.apache.hyracks.api.io.IODeviceHandle;
import org.apache.hyracks.api.util.IoUtil;
import org.apache.hyracks.cloud.io.ICloudIOManager;
import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest;
@@ -61,6 +62,8 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
public abstract class AbstractCloudIOManager extends IOManager implements IPartitionBootstrapper, ICloudIOManager {
private static final Logger LOGGER = LogManager.getLogger();
@@ -169,6 +172,8 @@
protected abstract void downloadPartitions(boolean metadataNode, int metadataPartition) throws HyracksDataException;
+ protected abstract Set<UncachedFileReference> getUncachedFiles();
+
/*
* ******************************************************************
* ICloudIOManager functions
@@ -362,13 +367,59 @@
}
/**
- * Returns a list of all stored objects (sorted ASC by path) in the cloud and their sizes
+ * Returns a list of all stored objects (sorted ASC by path) in the cloud and their sizes. The already cached files
+ * are retrieved by listing the local disk, while the uncached files are retrieved from uncached files trackers.
*
* @param objectMapper to create the result {@link JsonNode}
* @return {@link JsonNode} with stored objects' information
*/
public final JsonNode listAsJson(ObjectMapper objectMapper) {
- return cloudClient.listAsJson(objectMapper, bucket);
+ ArrayNode objectsInfo = objectMapper.createArrayNode();
+ List<CloudFile> allFiles = new ArrayList<>();
+ try {
+ // get cached files (read from disk)
+ for (IODeviceHandle deviceHandle : getIODevices()) {
+ FileReference storageRoot = deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME);
+
+ Set<FileReference> deviceFiles;
+ try {
+ deviceFiles = localIoManager.list(storageRoot, IoUtil.NO_OP_FILTER);
+ } catch (Throwable th) {
+ LOGGER.warn("Failed to get local storage files for root {}", storageRoot.getRelativePath(), th);
+ continue;
+ }
+
+ for (FileReference fileReference : deviceFiles) {
+ try {
+ allFiles.add(CloudFile.of(fileReference.getRelativePath(), fileReference.getFile().length()));
+ } catch (Throwable th) {
+ LOGGER.warn("Encountered issue for local storage file {}", fileReference.getRelativePath(), th);
+ }
+ }
+ }
+
+ // get uncached files from uncached files tracker
+ for (UncachedFileReference uncachedFile : getUncachedFiles()) {
+ allFiles.add(CloudFile.of(uncachedFile.getRelativePath(), uncachedFile.getSize()));
+ }
+
+ // combine all and sort
+ allFiles.sort((x, y) -> String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath()));
+
+ for (CloudFile file : allFiles) {
+ ObjectNode objectInfo = objectsInfo.addObject();
+ objectInfo.put("path", file.getPath());
+ objectInfo.put("size", file.getSize());
+ }
+
+ return objectsInfo;
+ } catch (Throwable th) {
+ LOGGER.warn("Failed to retrieve list of all cloud files", th);
+ objectsInfo.removeAll();
+ ObjectNode objectInfo = objectsInfo.addObject();
+ objectInfo.put("error", "Failed to retrieve list of all cloud files. " + th.getMessage());
+ return objectsInfo;
+ }
}
/**
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
index 764d436..1cb6077 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
@@ -67,6 +67,11 @@
}
@Override
+ protected Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
+
+ @Override
protected void onOpen(CloudFileHandle fileHandle) {
// NoOp
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
index 35b7255..1c5efd9 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
@@ -137,6 +137,11 @@
}
}
+ @Override
+ protected Set<UncachedFileReference> getUncachedFiles() {
+ return accessor.getUncachedFiles();
+ }
+
private ILazyAccessor createAccessor(ParallelCacher cacher, boolean canReplaceAccessor) {
if (canReplaceAccessor) {
return new ReplaceableCloudAccessor(cloudClient, bucket, localIoManager, partitions, replacer, cacher);
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
index aebeab2..486074d 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
@@ -23,6 +23,7 @@
import java.util.Collection;
import java.util.Set;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -38,6 +39,13 @@
/**
* Returns a list of all uncached files
*
+ * @return all uncached files
+ */
+ Set<UncachedFileReference> getUncachedFiles();
+
+ /**
+ * Returns a list of all uncached files of specified directory
+ *
* @param dir directory to list
* @param filter file name filter
* @return set of uncached files
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
index c953de5..b1947ec 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.Set;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -35,6 +36,11 @@
}
@Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
+
+ @Override
public Set<FileReference> getUncachedFiles(FileReference dir, FilenameFilter filter) {
return Collections.emptySet();
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
index 56619c8..24f5694 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
@@ -88,6 +88,14 @@
}
@Override
+ public synchronized Set<UncachedFileReference> getUncachedFiles() {
+ Set<UncachedFileReference> uncached = new HashSet<>();
+ uncached.addAll(uncachedDataFiles.values());
+ uncached.addAll(uncachedMetadataFiles.values());
+ return uncached;
+ }
+
+ @Override
public Set<FileReference> getUncachedFiles(FileReference dir, FilenameFilter filter) {
if (dir.getRelativePath().endsWith(StorageConstants.STORAGE_ROOT_DIR_NAME)) {
return uncachedDataFiles.keySet().stream()
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
index e6c0692..fc1d98a 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
@@ -22,6 +22,7 @@
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -111,4 +112,11 @@
* @param directory to evict
*/
void doEvict(FileReference directory) throws HyracksDataException;
+
+ /**
+ * Returns all uncached files
+ *
+ * @return all uncached files
+ */
+ Set<UncachedFileReference> getUncachedFiles();
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
index ae32402..c843074 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
@@ -19,9 +19,11 @@
package org.apache.asterix.cloud.lazy.accessor;
import java.io.FilenameFilter;
+import java.util.Collections;
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack;
import org.apache.asterix.cloud.clients.ICloudClient;
@@ -84,4 +86,9 @@
cloudClient.write(bucket, fileReference.getRelativePath(), bytes);
localIoManager.overwrite(fileReference, bytes);
}
+
+ @Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
}
diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
index 1a440e7..91046a1 100644
--- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
+++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
@@ -23,6 +23,7 @@
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.asterix.cloud.clients.CloudFile;
import org.apache.asterix.cloud.clients.ICloudClient;
@@ -185,4 +186,9 @@
return dir.getRelativePath().startsWith(StorageConstants.METADATA_TXN_NOWAL_DIR_NAME)
|| dir.getName().equals(StorageConstants.GLOBAL_TXN_DIR_NAME);
}
+
+ @Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return cacher.getUncachedFiles();
+ }
}