[ASTERIXDB-3609][API][RT] Library archive limits
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Library archives might be highly compressed or have
a very high number of entries. We should check for
these conditions when extracting them and make sure
they conform to reasonable limits for the use cases
we expect.
Ext-ref: MB-66704
Change-Id: I524cbfb6685bd3c1e326ad2db48482e5aba1ce6d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19771
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@apache.org>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
index 715e626..38ad7ac 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
@@ -91,7 +91,9 @@
libraryManager.download(targetFileRef, authToken, libraryURI);
Path outputDirPath = libraryManager.getStorageDir().getFile().toPath().toAbsolutePath().normalize();
FileReference outPath = appContext.getIoManager().resolveAbsolutePath(outputDirPath.toString());
- libraryManager.unzip(targetFileRef, outPath);
+ //we have to bypass limits here on the unzip, because this archive is all libraries, so potentially limit*N
+ //and knowing N here would be difficult since metadata is not available.
+ libraryManager.unzip(targetFileRef, outPath, false);
} catch (IOException e) {
LOGGER.error("Unable to retrieve UDFs from " + libraryURI.toString() + " before timeout");
throw HyracksDataException.create(e);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
index db87c60..5c88811 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
@@ -73,8 +73,10 @@
void unzip(FileReference sourceFile, FileReference outputDir) throws IOException;
- void writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer, IIOManager localIoManager)
- throws IOException;
+ void unzip(FileReference sourceFile, FileReference outputDir, boolean limited) throws IOException;
+
+ long writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer, IIOManager localIoManager,
+ boolean limited) throws IOException;
void setUploadClient(Function<ILibraryManager, CloseableHttpClient> f);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
index f15e96f..706b1f9 100755
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
@@ -111,6 +111,7 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.io.ByteStreams;
public class ExternalLibraryManager implements ILibraryManager, ILifeCycleComponent {
@@ -152,6 +153,9 @@
private final INamespacePathResolver namespacePathResolver;
private final boolean sslEnabled;
private final boolean cloudMode;
+ private final long maxFileSize;
+ private final long maxTotalSize;
+ private final int maxEntries;
private Function<ILibraryManager, CloseableHttpClient> uploadClientSupp;
public ExternalLibraryManager(NodeControllerService ncs, IPersistedResourceRegistry reg, FileReference appDir,
@@ -171,6 +175,9 @@
this.ioManager = ioManager;
uploadClientSupp = ExternalLibraryManager::defaultHttpClient;
cloudMode = ncs.getConfiguration().isCloudDeployment();
+ maxFileSize = ncs.getConfiguration().getLibraryMaxFileSize();
+ maxTotalSize = ncs.getConfiguration().getLibraryMaxExtractedSize();
+ maxEntries = ncs.getConfiguration().getLibraryMaxArchiveEntries();
}
public void initialize(boolean resetStorageData) throws HyracksDataException {
@@ -655,6 +662,11 @@
@Override
public void unzip(FileReference sourceFile, FileReference outputDir) throws IOException {
+ unzip(sourceFile, outputDir, true);
+ }
+
+ @Override
+ public void unzip(FileReference sourceFile, FileReference outputDir, boolean limited) throws IOException {
boolean logTraceEnabled = LOGGER.isTraceEnabled();
IIOManager localIoManager = ioManager;
if (ncs.getConfiguration().isCloudDeployment()) {
@@ -665,7 +677,18 @@
try (ZipFile zipFile = new ZipFile(sourceFile.getFile())) {
Enumeration<? extends ZipEntry> entries = zipFile.entries();
byte[] writeBuf = new byte[4096];
+ int numEntries = 0;
+ long totalSize = 0;
while (entries.hasMoreElements()) {
+ if (limited && numEntries >= maxEntries) {
+ throw new IOException(
+ "Library archive contains more files and directories than configuration permits");
+ }
+ //may exceed the total allowable size by the maximum size of one file, because we can't know how
+ //big the file is until we actually attempt to write it.
+ if (limited && totalSize > maxTotalSize) {
+ throw new IOException("Library archive extracted size exceeds maximum configured allowable size");
+ }
ZipEntry entry = entries.nextElement();
if (entry.isDirectory()) {
continue;
@@ -685,8 +708,9 @@
if (logTraceEnabled) {
LOGGER.trace("Extracting file {}", entryOutputFileRef);
}
- writeAndForce(entryOutputFileRef, in, writeBuf, localIoManager);
+ totalSize += writeAndForce(entryOutputFileRef, in, writeBuf, localIoManager, limited);
}
+ numEntries++;
}
}
for (Path newDir : newDirs) {
@@ -695,16 +719,23 @@
}
@Override
- public void writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer,
- IIOManager localIoManager) throws IOException {
+ public long writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer,
+ IIOManager localIoManager, boolean limited) throws IOException {
+ long written;
outputFile.getFile().createNewFile();
IFileHandle fHandle = localIoManager.open(outputFile, IIOManager.FileReadWriteMode.READ_WRITE,
IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
WritableByteChannel outChannel = localIoManager.newWritableChannel(fHandle);
try (OutputStream outputStream = Channels.newOutputStream(outChannel)) {
- IOUtils.copyLarge(dataStream, outputStream, copyBuffer);
+ InputStream limitedStream = ByteStreams.limit(dataStream, maxFileSize);
+ written = ByteStreams.copy(limitedStream, outputStream);
+ //Check if after writing the limited stream, there's still data to be written from the entry
+ if (limited && dataStream.available() > 0) {
+ throw new IOException("Library contains file exceeding maximum configured allowable size");
+ }
outputStream.flush();
localIoManager.sync(fHandle, true);
+ return written;
} finally {
localIoManager.close(fHandle);
}
@@ -750,9 +781,9 @@
}
try {
if (ncs.getConfiguration().isCloudDeployment()) {
- writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) ioManager).getLocalIOManager());
+ writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) ioManager).getLocalIOManager(), true);
} else {
- writeAndForce(outputFile, is, copyBuf, ioManager);
+ writeAndForce(outputFile, is, copyBuf, ioManager, true);
}
} finally {
is.close();
@@ -768,7 +799,7 @@
boolean cloud, byte[] copyBuf) throws IOException {
byte[] bytes = libraryManager.serializeLibraryDescriptor(desc);
libraryManager.writeAndForce(descFile, new ByteArrayInputStream(bytes), copyBuf,
- libraryManager.getCloudIOManager());
+ libraryManager.getCloudIOManager(), true);
}
}
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 44b2fdc..90bc499 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -24,6 +24,7 @@
import static org.apache.hyracks.control.common.config.OptionTypes.LONG;
import static org.apache.hyracks.control.common.config.OptionTypes.NONNEGATIVE_INTEGER;
import static org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_INTEGER;
+import static org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_LONG_BYTE_UNIT;
import static org.apache.hyracks.control.common.config.OptionTypes.STRING;
import static org.apache.hyracks.control.common.config.OptionTypes.STRING_ARRAY;
@@ -102,6 +103,9 @@
PYTHON_ARGS(STRING_ARRAY, (String[]) null),
PYTHON_ENV(STRING_ARRAY, (String[]) null),
PYTHON_DS_PATH(STRING, (String) null),
+ LIBRARY_MAX_FILE_SIZE(POSITIVE_LONG_BYTE_UNIT, 250L * 1024 * 1024), //250MB
+ LIBRARY_MAX_EXTRACTED_SIZE(POSITIVE_LONG_BYTE_UNIT, 1000L * 1024 * 1024), //1GB
+ LIBRARY_MAX_ARCHIVE_ENTRIES(INTEGER, 4096),
CREDENTIAL_FILE(
OptionTypes.STRING,
(Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -253,6 +257,12 @@
return "List of environment variables to set when invoking the Python interpreter for Python UDFs. E.g. FOO=1";
case PYTHON_DS_PATH:
return "Path to systemd socket for fenced Python UDFs. Requires JDK17+, *nix operating system, and ";
+ case LIBRARY_MAX_FILE_SIZE:
+ return "Maximum file size for any one given file in a zip archive";
+ case LIBRARY_MAX_EXTRACTED_SIZE:
+ return "Maximum overall extracted size for a library";
+ case LIBRARY_MAX_ARCHIVE_ENTRIES:
+ return "Maximum number of files and directories allowed within a library";
case CREDENTIAL_FILE:
return "Path to HTTP basic credentials";
case ABORT_TASKS_TIMEOUT:
@@ -636,4 +646,16 @@
return appConfig.getInt(Option.ABORT_TASKS_TIMEOUT);
}
+ public long getLibraryMaxFileSize() {
+ return appConfig.getLong(Option.LIBRARY_MAX_FILE_SIZE);
+ }
+
+ public long getLibraryMaxExtractedSize() {
+ return appConfig.getLong(Option.LIBRARY_MAX_EXTRACTED_SIZE);
+ }
+
+ public int getLibraryMaxArchiveEntries() {
+ return appConfig.getInt(Option.LIBRARY_MAX_ARCHIVE_ENTRIES);
+ }
+
}