[NO ISSUE][EXT] Disable Hadoop FileSystem Cache

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
Hadoop FileSystem can be cached along with any credentials
(e.g., S3 tokens). We want to avoid caching the credentials
to avoid any risks that could be caused by caching them.

Change-Id: Icc36ddf013eadff0fe1cca7c2e52fcd5f2bbbb5b
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/13145
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wael Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 243b87c..f7d9de2 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -63,6 +63,8 @@
     public static final String KEY_HADOOP_ASTERIX_WARNINGS_ENABLED = "org.apache.asterix.warnings.enabled";
     //Base64 encoded warnings issued from Hadoop
     public static final String KEY_HADOOP_ASTERIX_WARNINGS_LIST = "org.apache.asterix.warnings.list";
+    //Disable caching FileSystem for Hadoop
+    public static final String KEY_HADOOP_DISABLE_FS_CACHE_TEMPLATE = "fs.%s.impl.disable.cache";
     //Base64 encoded function call information
     public static final String KEY_HADOOP_ASTERIX_FUNCTION_CALL_INFORMATION = "org.apache.asterix.function.info";
     public static final String KEY_SOURCE_DATATYPE = "type-name";
@@ -84,7 +86,6 @@
     public static final String KEY_ESCAPE = "escape";
     public static final String KEY_PARSER = "parser";
     public static final String KEY_DATASET_RECORD = "dataset-record";
-    public static final String KEY_HIVE_SERDE = "hive-serde";
     public static final String KEY_RSS_URL = "url";
     public static final String KEY_INTERVAL = "interval";
     public static final String KEY_IS_FEED = "is-feed";
@@ -178,7 +179,6 @@
     /**
      * supported builtin record formats
      */
-    public static final String FORMAT_HIVE = "hive";
     public static final String FORMAT_BINARY = "binary";
     public static final String FORMAT_ADM = "adm";
     public static final String FORMAT_JSON_LOWER_CASE = "json";
@@ -198,7 +198,6 @@
 
     static {
         Set<String> formats = new HashSet<>(14);
-        formats.add(FORMAT_HIVE);
         formats.add(FORMAT_BINARY);
         formats.add(FORMAT_ADM);
         formats.add(FORMAT_JSON_LOWER_CASE);
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index dd44436..dcd8f9a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -31,6 +31,7 @@
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_CREDENTIAL_PROVIDER_KEY;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_PATH_STYLE_ACCESS;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_S3_CONNECTION_POOL_SIZE;
+import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_S3_PROTOCOL;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SECRET_ACCESS_KEY;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_SESSION_TOKEN;
 import static org.apache.asterix.external.util.ExternalDataConstants.AwsS3.HADOOP_TEMP_ACCESS;
@@ -927,6 +928,9 @@
             String sessionToken = configuration.get(ExternalDataConstants.AwsS3.SESSION_TOKEN_FIELD_NAME);
             String serviceEndpoint = configuration.get(ExternalDataConstants.AwsS3.SERVICE_END_POINT_FIELD_NAME);
 
+            //Disable caching S3 FileSystem
+            HDFSUtils.disableHadoopFileSystemCache(conf, HADOOP_S3_PROTOCOL);
+
             /*
              * Authentication Methods:
              * 1- Anonymous: no accessKeyId and no secretAccessKey
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index e774b40..9f65cd7 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -336,4 +336,16 @@
             conf.unset(ExternalDataConstants.KEY_HADOOP_ASTERIX_WARNINGS_LIST);
         }
     }
+
+    /**
+     * Hadoop can cache FileSystem instance if reading the same file. This method allows for disabling the cache
+     *
+     * @param conf     Hadoop configuration
+     * @param protocol fs scheme (or protocol). e.g., s3a
+     */
+    public static void disableHadoopFileSystemCache(Configuration conf, String protocol) {
+        //Disable fs cache
+        conf.set(String.format(ExternalDataConstants.KEY_HADOOP_DISABLE_FS_CACHE_TEMPLATE, protocol),
+                ExternalDataConstants.TRUE);
+    }
 }