[ASTERIXDB-3609][API][RT] Library archive limits

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
Library archives might be highly compressed or have
a very high number of entries. We should check for
these conditions when extracting them and make sure
they conform to reasonable limits for the use cases
we expect.

Ext-ref: MB-66704
Change-Id: I524cbfb6685bd3c1e326ad2db48482e5aba1ce6d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19771
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@apache.org>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
index 715e626..38ad7ac 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
@@ -91,7 +91,9 @@
             libraryManager.download(targetFileRef, authToken, libraryURI);
             Path outputDirPath = libraryManager.getStorageDir().getFile().toPath().toAbsolutePath().normalize();
             FileReference outPath = appContext.getIoManager().resolveAbsolutePath(outputDirPath.toString());
-            libraryManager.unzip(targetFileRef, outPath);
+            //we have to bypass limits here on the unzip, because this archive is all libraries, so potentially limit*N
+            //and knowing N here would be difficult since metadata is not available.
+            libraryManager.unzip(targetFileRef, outPath, false);
         } catch (IOException e) {
             LOGGER.error("Unable to retrieve UDFs from " + libraryURI.toString() + " before timeout");
             throw HyracksDataException.create(e);
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
index db87c60..5c88811 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
@@ -73,8 +73,10 @@
 
     void unzip(FileReference sourceFile, FileReference outputDir) throws IOException;
 
-    void writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer, IIOManager localIoManager)
-            throws IOException;
+    void unzip(FileReference sourceFile, FileReference outputDir, boolean limited) throws IOException;
+
+    long writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer, IIOManager localIoManager,
+            boolean limited) throws IOException;
 
     void setUploadClient(Function<ILibraryManager, CloseableHttpClient> f);
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
index f15e96f..706b1f9 100755
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
@@ -111,6 +111,7 @@
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.io.ByteStreams;
 
 public class ExternalLibraryManager implements ILibraryManager, ILifeCycleComponent {
 
@@ -152,6 +153,9 @@
     private final INamespacePathResolver namespacePathResolver;
     private final boolean sslEnabled;
     private final boolean cloudMode;
+    private final long maxFileSize;
+    private final long maxTotalSize;
+    private final int maxEntries;
     private Function<ILibraryManager, CloseableHttpClient> uploadClientSupp;
 
     public ExternalLibraryManager(NodeControllerService ncs, IPersistedResourceRegistry reg, FileReference appDir,
@@ -171,6 +175,9 @@
         this.ioManager = ioManager;
         uploadClientSupp = ExternalLibraryManager::defaultHttpClient;
         cloudMode = ncs.getConfiguration().isCloudDeployment();
+        maxFileSize = ncs.getConfiguration().getLibraryMaxFileSize();
+        maxTotalSize = ncs.getConfiguration().getLibraryMaxExtractedSize();
+        maxEntries = ncs.getConfiguration().getLibraryMaxArchiveEntries();
     }
 
     public void initialize(boolean resetStorageData) throws HyracksDataException {
@@ -655,6 +662,11 @@
 
     @Override
     public void unzip(FileReference sourceFile, FileReference outputDir) throws IOException {
+        unzip(sourceFile, outputDir, true);
+    }
+
+    @Override
+    public void unzip(FileReference sourceFile, FileReference outputDir, boolean limited) throws IOException {
         boolean logTraceEnabled = LOGGER.isTraceEnabled();
         IIOManager localIoManager = ioManager;
         if (ncs.getConfiguration().isCloudDeployment()) {
@@ -665,7 +677,18 @@
         try (ZipFile zipFile = new ZipFile(sourceFile.getFile())) {
             Enumeration<? extends ZipEntry> entries = zipFile.entries();
             byte[] writeBuf = new byte[4096];
+            int numEntries = 0;
+            long totalSize = 0;
             while (entries.hasMoreElements()) {
+                if (limited && numEntries >= maxEntries) {
+                    throw new IOException(
+                            "Library archive contains more files and directories than configuration permits");
+                }
+                //may exceed the total allowable size by the maximum size of one file, because we can't know how
+                //big the file is until we actually attempt to write it.
+                if (limited && totalSize > maxTotalSize) {
+                    throw new IOException("Library archive extracted size exceeds maximum configured allowable size");
+                }
                 ZipEntry entry = entries.nextElement();
                 if (entry.isDirectory()) {
                     continue;
@@ -685,8 +708,9 @@
                     if (logTraceEnabled) {
                         LOGGER.trace("Extracting file {}", entryOutputFileRef);
                     }
-                    writeAndForce(entryOutputFileRef, in, writeBuf, localIoManager);
+                    totalSize += writeAndForce(entryOutputFileRef, in, writeBuf, localIoManager, limited);
                 }
+                numEntries++;
             }
         }
         for (Path newDir : newDirs) {
@@ -695,16 +719,23 @@
     }
 
     @Override
-    public void writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer,
-            IIOManager localIoManager) throws IOException {
+    public long writeAndForce(FileReference outputFile, InputStream dataStream, byte[] copyBuffer,
+            IIOManager localIoManager, boolean limited) throws IOException {
+        long written;
         outputFile.getFile().createNewFile();
         IFileHandle fHandle = localIoManager.open(outputFile, IIOManager.FileReadWriteMode.READ_WRITE,
                 IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
         WritableByteChannel outChannel = localIoManager.newWritableChannel(fHandle);
         try (OutputStream outputStream = Channels.newOutputStream(outChannel)) {
-            IOUtils.copyLarge(dataStream, outputStream, copyBuffer);
+            InputStream limitedStream = ByteStreams.limit(dataStream, maxFileSize);
+            written = ByteStreams.copy(limitedStream, outputStream);
+            //Check if after writing the limited stream, there's still data to be written from the entry
+            if (limited && dataStream.available() > 0) {
+                throw new IOException("Library contains file exceeding maximum configured allowable size");
+            }
             outputStream.flush();
             localIoManager.sync(fHandle, true);
+            return written;
         } finally {
             localIoManager.close(fHandle);
         }
@@ -750,9 +781,9 @@
         }
         try {
             if (ncs.getConfiguration().isCloudDeployment()) {
-                writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) ioManager).getLocalIOManager());
+                writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) ioManager).getLocalIOManager(), true);
             } else {
-                writeAndForce(outputFile, is, copyBuf, ioManager);
+                writeAndForce(outputFile, is, copyBuf, ioManager, true);
             }
         } finally {
             is.close();
@@ -768,7 +799,7 @@
             boolean cloud, byte[] copyBuf) throws IOException {
         byte[] bytes = libraryManager.serializeLibraryDescriptor(desc);
         libraryManager.writeAndForce(descFile, new ByteArrayInputStream(bytes), copyBuf,
-                libraryManager.getCloudIOManager());
+                libraryManager.getCloudIOManager(), true);
     }
 
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 44b2fdc..90bc499 100644
--- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -24,6 +24,7 @@
 import static org.apache.hyracks.control.common.config.OptionTypes.LONG;
 import static org.apache.hyracks.control.common.config.OptionTypes.NONNEGATIVE_INTEGER;
 import static org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_INTEGER;
+import static org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_LONG_BYTE_UNIT;
 import static org.apache.hyracks.control.common.config.OptionTypes.STRING;
 import static org.apache.hyracks.control.common.config.OptionTypes.STRING_ARRAY;
 
@@ -102,6 +103,9 @@
         PYTHON_ARGS(STRING_ARRAY, (String[]) null),
         PYTHON_ENV(STRING_ARRAY, (String[]) null),
         PYTHON_DS_PATH(STRING, (String) null),
+        LIBRARY_MAX_FILE_SIZE(POSITIVE_LONG_BYTE_UNIT, 250L * 1024 * 1024), //250MB
+        LIBRARY_MAX_EXTRACTED_SIZE(POSITIVE_LONG_BYTE_UNIT, 1000L * 1024 * 1024), //1GB
+        LIBRARY_MAX_ARCHIVE_ENTRIES(INTEGER, 4096),
         CREDENTIAL_FILE(
                 OptionTypes.STRING,
                 (Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -253,6 +257,12 @@
                     return "List of environment variables to set when invoking the Python interpreter for Python UDFs. E.g. FOO=1";
                 case PYTHON_DS_PATH:
                     return "Path to systemd socket for fenced Python UDFs. Requires JDK17+, *nix operating system, and ";
+                case LIBRARY_MAX_FILE_SIZE:
+                    return "Maximum file size for any one given file in a zip archive";
+                case LIBRARY_MAX_EXTRACTED_SIZE:
+                    return "Maximum overall extracted size for a library";
+                case LIBRARY_MAX_ARCHIVE_ENTRIES:
+                    return "Maximum number of files and directories allowed within a library";
                 case CREDENTIAL_FILE:
                     return "Path to HTTP basic credentials";
                 case ABORT_TASKS_TIMEOUT:
@@ -636,4 +646,16 @@
         return appConfig.getInt(Option.ABORT_TASKS_TIMEOUT);
     }
 
+    public long getLibraryMaxFileSize() {
+        return appConfig.getLong(Option.LIBRARY_MAX_FILE_SIZE);
+    }
+
+    public long getLibraryMaxExtractedSize() {
+        return appConfig.getLong(Option.LIBRARY_MAX_EXTRACTED_SIZE);
+    }
+
+    public int getLibraryMaxArchiveEntries() {
+        return appConfig.getInt(Option.LIBRARY_MAX_ARCHIVE_ENTRIES);
+    }
+
 }