[ASTERIXDB-3049][EXT]: Handle external failures for Parquet at compilation stage

Change-Id: I1581c193de73fb220f05aa994c0d8b66bfff60fa
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/16803
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Tested-by: Hussain Towaileb <hussainht@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Hussain Towaileb <hussainht@gmail.com>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 8ea9ed4..419782f 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -25,7 +25,8 @@
 import java.util.Map;
 
 import org.apache.asterix.common.api.IApplicationContext;
-import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.external.api.AsterixInputStream;
 import org.apache.asterix.external.api.IExternalIndexer;
 import org.apache.asterix.external.api.IIndexibleExternalDataSource;
@@ -131,7 +132,7 @@
                 this.recordClass = char[].class;
             }
         } catch (IOException e) {
-            throw new AsterixException(e);
+            throw new CompilationException(ErrorCode.EXTERNAL_SOURCE_ERROR, e);
         }
     }
 
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
index 803e657..4e3d1ec 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/parquet/AwsS3ParquetReaderFactory.java
@@ -18,12 +18,16 @@
  */
 package org.apache.asterix.external.input.record.reader.aws.parquet;
 
+import static org.apache.hyracks.api.util.ExceptionUtils.getMessageOrToString;
+
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.asterix.common.exceptions.CompilationException;
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
 import org.apache.asterix.external.input.HDFSDataSourceFactory;
 import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory.IncludeExcludeMatcher;
 import org.apache.asterix.external.util.ExternalDataConstants;
@@ -33,7 +37,11 @@
 import org.apache.hyracks.api.application.IServiceContext;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.exceptions.IWarningCollector;
+import org.apache.hyracks.api.util.ExceptionUtils;
 
+import com.amazonaws.SdkBaseException;
+
+import software.amazon.awssdk.core.exception.SdkException;
 import software.amazon.awssdk.services.s3.model.S3Object;
 
 public class AwsS3ParquetReaderFactory extends HDFSDataSourceFactory {
@@ -50,10 +58,20 @@
         putS3ConfToHadoopConf(configuration, path);
 
         //Configure Hadoop S3 input splits
-        JobConf conf = createHdfsConf(serviceCtx, configuration);
-        int numberOfPartitions = getPartitionConstraint().getLocations().length;
-        ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, numberOfPartitions);
-        configureHdfsConf(conf, configuration);
+        try {
+            JobConf conf = createHdfsConf(serviceCtx, configuration);
+            int numberOfPartitions = getPartitionConstraint().getLocations().length;
+            ExternalDataUtils.AwsS3.configureAwsS3HdfsJobConf(conf, configuration, numberOfPartitions);
+            configureHdfsConf(conf, configuration);
+        } catch (SdkException | SdkBaseException ex) {
+            throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(ex));
+        } catch (AlgebricksException ex) {
+            Throwable root = ExceptionUtils.getRootCause(ex);
+            if (root instanceof SdkException || root instanceof SdkBaseException) {
+                throw new RuntimeDataException(ErrorCode.EXTERNAL_SOURCE_ERROR, getMessageOrToString(root));
+            }
+            throw ex;
+        }
     }
 
     @Override