[ASTERIXDB-3519][EXT]: Support reading avro files from HDFS
- user model changes: support reading avro records
- storage format changes: no
- interface changes: no
details:
- support reading avro from hdfs
Ext-ref: MB-63117
Change-Id: I7da0b293479df04213c7301391c644c57665eda7
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19166
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
index 606c781..76ae449 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/array-access/array-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
index 65a2b38..ca20804 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-map/avro-map.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
index 65a2b38..ca20804 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-nested-records/avro-nested-records.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
index 65a2b38..ca20804 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-primitives/avro-primitives.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
index 65a2b38..ca20804 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/avro-types/avro-union/avro-union.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*avro_type.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
index 5e30b26..74b7f17 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/field-access/field-access.01.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
@@ -43,8 +43,8 @@
CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
@@ -52,8 +52,8 @@
CREATE EXTERNAL DATASET AvroDataset3(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_name_comment.avro"),
("format" = "avro")
);
@@ -61,8 +61,8 @@
CREATE EXTERNAL DATASET AvroDataset4(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
index ce5eb8a..9445a2c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/heterogeneous-access/heterogeneous-access.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*heterogeneous*"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
index a3a6d0d..ac678fb 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-avro-files/test.000.ddl.sqlpp
@@ -28,7 +28,7 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="json-data/reviews/single-line/json"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%json-data/reviews/single-line/json"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
index c6ee970..4f4e322 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/invalid-type/invalid-type.1.ddl.sqlpp
@@ -35,8 +35,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
index 3c8e934..74664c5 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/missing-fields/missing-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
index 67e38d2..92dca50 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/multi-file-multi-schema/multi-file-multi-schema.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("include#1"="*id_name.avro"),
("format" = "avro")
@@ -44,8 +44,8 @@
CREATE EXTERNAL DATASET AvroDataset2(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_age.avro"),
("include#1"="*id_age-string.avro"),
("format" = "avro")
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
index 507d6fa..950bfba 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/no-files/no-files.1.ddl.sqlpp
@@ -36,7 +36,7 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
+ %additional_Properties%,
("definition"="NOT_A_DEFINITION"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
index c7f127b..6f64c80 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/object-concat/object-concat.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
index 6d77dab..fb2d3db 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-all-fields/select-all-fields.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
-("container"="playground"),
-("definition"="avro-data/reviews"),
+%additional_Properties%,
+("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
index 052b127..285d19c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/select-count-one-field/select-count-one-field.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
index 17433a5..d36da08 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/string-standard-utf8/string-standard-utf8.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*id_name_comment.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
index c8d7d21..d6d9dec 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/avro/type-mismatch/type-mismatch.1.ddl.sqlpp
@@ -34,8 +34,8 @@
CREATE EXTERNAL DATASET AvroDataset(AvroType) USING %adapter%
(
%template%,
- ("container"="playground"),
- ("definition"="avro-data/reviews"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/reviews"),
("include"="*dummy_tweet.avro"),
("format" = "avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
index 654a84a..dd5c56d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-flat/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@
CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/name/{name:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/name/{name:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
index be3d97f..b835823 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-multiple-values/embed-multiple-values.000.ddl.sqlpp
@@ -26,8 +26,8 @@
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
index b1b1b76..c947f70 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-one-value/embed-one-value.000.ddl.sqlpp
@@ -27,16 +27,16 @@
CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/department/{department:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/department/{department:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/last-name/{name.last:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/last-name/{name.last:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
index 2c5b352..531e398 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/embed-with-nested-values/embed-flat.000.ddl.sqlpp
@@ -30,8 +30,8 @@
CREATE EXTERNAL DATASET Name(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/nested-value/{name:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/nested-value/{name:string}"),
("embed-filter-values" = "true"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
index 5fcfff5..173d1d0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/one-field/one-field.000.ddl.sqlpp
@@ -27,16 +27,16 @@
CREATE EXTERNAL DATASET Department(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/department/{department:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/department/{department:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET LastName(TestType) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/last-name/{name.last:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/last-name/{name.last:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
index 4c418b4..49cc3e6 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/query/test.000.ddl.sqlpp
@@ -27,24 +27,24 @@
CREATE EXTERNAL DATASET company(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/car/{company:string}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/car/{company:string}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET customer(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}"),
("embed-filter-values" = "false"),
("format"="avro")
);
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
index 13316e8..073965b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/using-limit/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "false"),
("format"="avro")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
index 3c2b513..495acc8 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/dynamic-prefixes/avro/views/test.000.ddl.sqlpp
@@ -26,8 +26,8 @@
CREATE EXTERNAL DATASET maintenance(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customer_id:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
@@ -39,8 +39,8 @@
CREATE EXTERNAL DATASET maintenance2(test) USING %adapter% (
%template%,
- ("container"="playground"),
- ("definition"="avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
+ %additional_Properties%,
+ ("definition"="%path_prefix%avro-data/external-filter/embed/car/{company:string}/customer/{customerId:int}/maintenance-report/year-{year:int}-month-{month:int}-day-{day:int}-date"),
("embed-filter-values" = "true"),
("format"="avro")
);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index c8a6785..ff1b325 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -558,72 +558,96 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-map">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-nested-records">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-primitives">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/avro-types/avro-primitives</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/avro-types/avro-union">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/avro-types/avro-union</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/type-mismatch">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/type-mismatch</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/string-standard-utf8">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/string-standard-utf8</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/select-all-fields">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/select-all-fields</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/select-count-one-field">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/select-count-one-field</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/array-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/array-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/field-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/field-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/heterogeneous-access">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/heterogeneous-access</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/invalid-avro-files">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">none</output-dir>
<expected-error>External source error. org.apache.avro.InvalidAvroMagicException: Not an Avro data file.</expected-error>
<source-location>false</source-location>
@@ -632,6 +656,8 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/invalid-type">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">none</output-dir>
<expected-error>ASX3123: Type 'AvroType' contains declared fields, which is not supported for 'avro' format</expected-error>
</compilation-unit>
@@ -639,24 +665,32 @@
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/missing-fields">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/missing-fields</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/multi-file-multi-schema">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/multi-file-multi-schema</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/no-files">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/no-files</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset">
<compilation-unit name="common/avro/object-concat">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">common/avro/object-concat</output-dir>
</compilation-unit>
</test-case>
@@ -891,48 +925,64 @@
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="one-field">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">one-field</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="query">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">query</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-one-value">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">embed-one-value</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-multiple-values">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">embed-multiple-values</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-flat">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">embed-flat</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="embed-with-nested-values">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">embed-with-nested-values</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="using-limit">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">using-limit</output-dir>
</compilation-unit>
</test-case>
<test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
<compilation-unit name="views">
<placeholder name="adapter" value="S3" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additional_Properties" value='("container" = "playground")' />
<output-dir compare="Text">views</output-dir>
</compilation-unit>
</test-case>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index 6851433..b178efd 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -359,4 +359,212 @@
<!-- </compilation-unit>-->
<!-- </test-case>-->
</test-group>
+ <test-group name="avro">
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-map">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/avro-types/avro-map</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-nested-records">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/avro-types/avro-nested-records</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-primitives">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/avro-types/avro-primitives</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/avro-types/avro-union">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/avro-types/avro-union</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/type-mismatch">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/type-mismatch</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/string-standard-utf8">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/string-standard-utf8</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-all-fields">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/select-all-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/select-count-one-field">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/select-count-one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/array-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/array-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/field-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/field-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/heterogeneous-access">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/heterogeneous-access</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-avro-files">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>External source error. org.apache.avro.InvalidAvroMagicException: Not an Avro data file</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/invalid-type">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">none</output-dir>
+ <expected-error>ASX3123: Type 'AvroType' contains declared fields, which is not supported for 'avro' format</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/missing-fields">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/missing-fields</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/multi-file-multi-schema">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/multi-file-multi-schema</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/no-files">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/no-files</output-dir>
+ <expected-error>ASX1114: The provided external dataset configuration returned no files from the external source</expected-error>
+ <source-location>false</source-location>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset">
+ <compilation-unit name="common/avro/object-concat">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">common/avro/object-concat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="one-field">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">one-field</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="query">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">query</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-one-value">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">embed-one-value</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-multiple-values">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">embed-multiple-values</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-flat">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">embed-flat</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="embed-with-nested-values">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">embed-with-nested-values</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="using-limit">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">using-limit</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="external-dataset/common/dynamic-prefixes/avro">
+ <compilation-unit name="views">
+ <placeholder name="adapter" value="hdfs" />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additional_Properties" value='("input-format" = "avro-input-format")' />
+ <output-dir compare="Text">views</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
</test-suite>
diff --git a/asterixdb/asterix-external-data/pom.xml b/asterixdb/asterix-external-data/pom.xml
index 27c3ac1..e1cdd3a 100644
--- a/asterixdb/asterix-external-data/pom.xml
+++ b/asterixdb/asterix-external-data/pom.xml
@@ -598,6 +598,11 @@
<artifactId>delta-standalone_2.12</artifactId>
<version>3.0.0</version>
</dependency>
+ <dependency>
+ <groupId>org.apache.avro</groupId>
+ <artifactId>avro-mapred</artifactId>
+ <version>1.12.0</version>
+ </dependency>
</dependencies>
<!-- apply patch for HADOOP-17225 to workaround CVE-2019-10172 -->
<repositories>
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 934ba1d..8af7342 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -45,6 +45,7 @@
import org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory;
import org.apache.asterix.external.input.record.reader.hdfs.HDFSRecordReader;
+import org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileRecordReader;
import org.apache.asterix.external.input.record.reader.hdfs.parquet.ParquetFileRecordReader;
import org.apache.asterix.external.input.record.reader.stream.StreamRecordReader;
import org.apache.asterix.external.input.stream.HDFSInputStream;
@@ -54,6 +55,7 @@
import org.apache.asterix.external.util.ExternalDataPrefix;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.HDFSUtils;
+import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
@@ -197,6 +199,8 @@
reader.close();
} else if (formatString.equals(ExternalDataConstants.FORMAT_PARQUET)) {
recordClass = IValueReference.class;
+ } else if (formatString.equals(ExternalDataConstants.FORMAT_AVRO)) {
+ recordClass = GenericRecord.class;
} else {
recordReaderClazz = StreamRecordReaderProvider.getRecordReaderClazz(configuration);
this.recordClass = char[].class;
@@ -356,6 +360,9 @@
if (configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
.equals(ExternalDataConstants.INPUT_FORMAT_PARQUET)) {
return new ParquetFileRecordReader<>(read, inputSplits, readSchedule, nodeName, conf, context, ugi);
+ } else if (configuration.get(ExternalDataConstants.KEY_INPUT_FORMAT).trim()
+ .equals(ExternalDataConstants.INPUT_FORMAT_AVRO)) {
+ return new AvroFileRecordReader<>(read, inputSplits, readSchedule, nodeName, conf, context, ugi);
} else {
return new HDFSRecordReader<>(read, inputSplits, readSchedule, nodeName, conf, ugi);
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
index 04c80d1..352f118 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/AbstractHDFSRecordReader.java
@@ -86,14 +86,11 @@
nextInputSplit();
}
- if (reader.next(key, value)) {
- return true;
- }
- while (nextInputSplit()) {
- if (reader.next(key, value)) {
+ do {
+ if (readerHasNext()) {
return true;
}
- }
+ } while (nextInputSplit());
return false;
}
@@ -103,6 +100,10 @@
return record;
}
+ protected boolean readerHasNext() throws IOException {
+ return reader.next(key, value);
+ }
+
private boolean nextInputSplit() throws IOException {
for (; currentSplitIndex < inputSplits.length; currentSplitIndex++) {
/**
@@ -128,14 +129,18 @@
continue;
}
- reader.close();
- reader = getRecordReader(currentSplitIndex);
+ closeRecordReader();
+ setRecordReader(currentSplitIndex);
return true;
}
}
return false;
}
+ protected void closeRecordReader() throws IOException {
+ reader.close();
+ }
+
/**
* Returns true if need to go to next split without closing the current reader
*
@@ -143,7 +148,7 @@
*/
protected abstract boolean onNextInputSplit() throws IOException;
- protected abstract RecordReader<K, V> getRecordReader(int splitIndex) throws IOException;
+ protected abstract void setRecordReader(int splitIndex) throws IOException;
@Override
public boolean stop() {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
index 6f338c4..f143188 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/HDFSRecordReader.java
@@ -37,7 +37,7 @@
}
@Override
- protected RecordReader<K, V> getRecordReader(int splitIndex) throws IOException {
+ protected void setRecordReader(int splitIndex) throws IOException {
try {
reader = ugi == null ? getReader(splitIndex)
: ugi.doAs((PrivilegedExceptionAction<RecordReader<K, V>>) () -> getReader(splitIndex));
@@ -48,7 +48,6 @@
key = reader.createKey();
value = reader.createValue();
}
- return reader;
}
@SuppressWarnings("unchecked")
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
new file mode 100644
index 0000000..3c279a4
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileInputFormat.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import org.apache.avro.mapred.AvroInputFormat;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.CompressionCodecFactory;
+import org.apache.hadoop.io.compress.SplittableCompressionCodec;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.JobConfigurable;
+
+public class AvroFileInputFormat<T> extends AvroInputFormat<T> implements JobConfigurable {
+ private CompressionCodecFactory compressionCodecs = null;
+
+ public void configure(JobConf conf) {
+ compressionCodecs = new CompressionCodecFactory(conf);
+ }
+
+ @Override
+ public boolean isSplitable(FileSystem fs, Path file) {
+ final CompressionCodec codec = compressionCodecs.getCodec(file);
+ if (null == codec) {
+ return true;
+ }
+ return codec instanceof SplittableCompressionCodec;
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
new file mode 100644
index 0000000..e505666
--- /dev/null
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/avro/AvroFileRecordReader.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.external.input.record.reader.hdfs.avro;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.PrivilegedExceptionAction;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.exceptions.RuntimeDataException;
+import org.apache.asterix.external.api.IExternalDataRuntimeContext;
+import org.apache.asterix.external.api.IRawRecord;
+import org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
+import org.apache.asterix.external.input.record.GenericRecord;
+import org.apache.asterix.external.input.record.reader.hdfs.AbstractHDFSRecordReader;
+import org.apache.asterix.external.input.record.reader.hdfs.EmptyRecordReader;
+import org.apache.avro.InvalidAvroMagicException;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.mapred.AvroWrapper;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.NullWritable;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.api.util.ExceptionUtils;
+
+public class AvroFileRecordReader<T> extends AbstractHDFSRecordReader<Void, T> {
+
+ private AvroWrapper<T> key;
+ private NullWritable value;
+ private final IRawRecord<T> record;
+ private RecordReader<AvroWrapper<T>, NullWritable> reader;
+ private final IExternalFilterValueEmbedder valueEmbedder;
+ private boolean isCompressed = false;
+ private DataFileStream<T> dataFileStream;
+ private FileSystem fs;
+ private InputStream in;
+
+ public AvroFileRecordReader(boolean[] read, InputSplit[] inputSplits, String[] readSchedule, String nodeName,
+ JobConf conf, IExternalDataRuntimeContext context, UserGroupInformation ugi) {
+ super(read, inputSplits, readSchedule, nodeName, conf, ugi);
+ reader = new EmptyRecordReader<>();
+ record = new GenericRecord<>();
+ valueEmbedder = context.getValueEmbedder();
+ }
+
+ @Override
+ protected boolean onNextInputSplit() {
+ return false;
+ }
+
+ @Override
+ @SuppressWarnings("unchecked")
+ protected void setRecordReader(int splitIndex) throws IOException {
+ try {
+ String filePath = getPath(inputSplits[splitIndex]);
+ valueEmbedder.setPath(filePath);
+ if (StringUtils.endsWithIgnoreCase(filePath, ".gz") || StringUtils.endsWithIgnoreCase(filePath, ".gzip")) {
+ isCompressed = true;
+ fs = ugi == null ? FileSystem.get(conf)
+ : ugi.doAs((PrivilegedExceptionAction<FileSystem>) () -> FileSystem.get(conf));
+ in = new GZIPInputStream(fs.open(new Path(filePath)));
+ GenericDatumReader<T> datumReader = new GenericDatumReader<>();
+ dataFileStream = new DataFileStream<>(in, datumReader);
+ } else {
+ isCompressed = false;
+ reader = (RecordReader<AvroWrapper<T>, NullWritable>) (ugi == null
+ ? inputFormat.getRecordReader(inputSplits[splitIndex], conf, Reporter.NULL)
+ : ugi.doAs((PrivilegedExceptionAction<?>) () -> inputFormat
+ .getRecordReader(inputSplits[splitIndex], conf, Reporter.NULL)));
+ if (key == null) {
+ key = reader.createKey();
+ value = reader.createValue();
+ }
+ }
+ } catch (InterruptedException ex) {
+ throw HyracksDataException.create(ex);
+ } catch (InvalidAvroMagicException ex) {
+ throw RuntimeDataException.create(ErrorCode.EXTERNAL_SOURCE_ERROR, ex,
+ ExceptionUtils.getMessageOrToString(ex));
+ }
+ }
+
+ @Override
+ protected void closeRecordReader() throws IOException {
+ if (isCompressed) {
+ dataFileStream.close();
+ in.close();
+ fs.close();
+ } else {
+ reader.close();
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (isCompressed) {
+ dataFileStream.close();
+ in.close();
+ fs.close();
+ } else {
+ reader.close();
+ }
+ }
+
+ @Override
+ protected boolean readerHasNext() throws IOException {
+ if (isCompressed) {
+ return dataFileStream.hasNext();
+ } else {
+ return reader.next(key, value);
+ }
+ }
+
+ @Override
+ public IRawRecord<T> next() throws IOException {
+ if (isCompressed) {
+ record.set(dataFileStream.next());
+ } else {
+ record.set(key.datum());
+ }
+ return record;
+ }
+
+ private String getPath(InputSplit split) {
+ if (split instanceof FileSplit) {
+ return ((FileSplit) split).getPath().toString();
+ } else {
+ return split.toString();
+ }
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
index c11885a..1d123b6 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/ParquetFileRecordReader.java
@@ -62,7 +62,7 @@
@SuppressWarnings("unchecked")
@Override
- protected RecordReader<Void, V> getRecordReader(int splitIndex) throws IOException {
+ protected void setRecordReader(int splitIndex) throws IOException {
try {
ParquetRecordReaderWrapper readerWrapper = ugi == null ? getReader(splitIndex)
: ugi.doAs((PrivilegedExceptionAction<ParquetRecordReaderWrapper>) () -> getReader(splitIndex));
@@ -75,7 +75,6 @@
if (value == null) {
value = reader.createValue();
}
- return reader;
}
private ParquetRecordReaderWrapper getReader(int splitIndex) throws IOException {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index d487e68..46a1b5b 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -24,6 +24,7 @@
import java.util.function.Supplier;
import java.util.regex.Pattern;
+import org.apache.asterix.external.input.record.reader.hdfs.avro.AvroFileInputFormat;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.hyracks.util.StorageUtil;
@@ -176,16 +177,21 @@
public static final String CLASS_NAME_SEQUENCE_INPUT_FORMAT = "org.apache.hadoop.mapred.SequenceFileInputFormat";
public static final String CLASS_NAME_PARQUET_INPUT_FORMAT =
"org.apache.asterix.external.input.record.reader.hdfs.parquet.MapredParquetInputFormat";
+ public static final String CLASS_NAME_AVRO_INPUT_FORMAT = AvroFileInputFormat.class.getName();
public static final String CLASS_NAME_HDFS_FILESYSTEM = "org.apache.hadoop.hdfs.DistributedFileSystem";
public static final String S3A_CHANGE_DETECTION_REQUIRED = "requireVersionChangeDetection";
public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY = "fs.s3a.change.detection.version.required";
public static final String HDFS_IO_COMPRESSION_CODECS_KEY = "io.compression.codecs";
+ public static final String HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION =
+ "avro.mapred.ignore.inputs.without.extension";
+
/**
* input formats aliases
*/
public static final String INPUT_FORMAT_TEXT = "text-input-format";
public static final String INPUT_FORMAT_SEQUENCE = "sequence-input-format";
public static final String INPUT_FORMAT_PARQUET = "parquet-input-format";
+ public static final String INPUT_FORMAT_AVRO = "avro-input-format";
public static final String HDFS_BLOCKSIZE = "blocksize";
public static final String HDFS_REPLICATION = "replication";
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 540de09..c7deb7c 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -1139,6 +1139,7 @@
protocol = nodePathPair[0];
break;
case ExternalDataConstants.KEY_ADAPTER_NAME_HDFS:
+ // Remove trailing slashes as prefixes/paths in hdfs start with a slash (absolute paths)
return configurations.get(ExternalDataConstants.KEY_HDFS_URL).replaceAll("/+$", "");
default:
return "";
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index 35f2a94..7c7e031 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -203,6 +203,8 @@
return ExternalDataConstants.CLASS_NAME_SEQUENCE_INPUT_FORMAT;
case ExternalDataConstants.INPUT_FORMAT_PARQUET:
return ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT;
+ case ExternalDataConstants.INPUT_FORMAT_AVRO:
+ return ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT;
default:
return inputFormatParameter;
}
@@ -252,6 +254,9 @@
if (ExternalDataConstants.CLASS_NAME_PARQUET_INPUT_FORMAT.equals(formatClassName)) {
configureParquet(configuration, conf);
+ } else if (ExternalDataConstants.CLASS_NAME_AVRO_INPUT_FORMAT.equals(formatClassName)) {
+ conf.set(ExternalDataConstants.HDFS_AVRO_IGNORE_INPUTS_WITHOUT_EXTENSION, ExternalDataConstants.FALSE);
+ disableHadoopFileSystemCache(conf, ExternalDataConstants.READER_HDFS);
}
if (configuration.containsKey(ExternalDataConstants.S3A_CHANGE_DETECTION_REQUIRED)) {