[ASTERIXDB-3117][EXT] Allow specifying a subpath for EXTERNAL datasets queries
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
This patch is to allow users to specify a subpath in queries scanning
external data sources like S3.
- add "subpath" hint to allow specifying a subpath to be used in
conjunction with the "definition" of external datasets.
- capture the "subpath" hint for each term in the FROM clause.
- pass the hint to the properties of IDataSource and expose them.
- Op Isomorphism: compare IDataSource properties only for EXTERNAL datasets.
Change-Id: I3d1ec61fd8aa3275260c1aed6d00fa3c7b408351
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17395
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java
index 3c4537f..ddabaa0 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/SqlppExpressionToPlanTranslator.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.translator;
+import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH;
+
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -28,6 +30,7 @@
import java.util.function.Predicate;
import org.apache.asterix.algebra.base.ILangExpressionToPlanTranslator;
+import org.apache.asterix.common.annotations.ExternalSubpathAnnotation;
import org.apache.asterix.common.exceptions.CompilationException;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.functions.FunctionSignature;
@@ -325,6 +328,10 @@
} else {
unnestOp = new UnnestOperator(fromVar, new MutableObject<>(pUnnestExpr.first));
}
+ ExternalSubpathAnnotation hint = ((AbstractExpression) fromExpr).findHint(ExternalSubpathAnnotation.class);
+ if (hint != null) {
+ unnestOp.getAnnotations().put(SUBPATH, hint.getSubPath());
+ }
unnestOp.getInputs().add(pUnnestExpr.second);
unnestOp.setSourceLocation(sourceLoc);
@@ -576,6 +583,10 @@
outerUnnestMissingValue)
: new UnnestOperator(rightVar, new MutableObject<>(pUnnestExpr.first));
}
+ ExternalSubpathAnnotation hint = ((AbstractExpression) rightExpr).findHint(ExternalSubpathAnnotation.class);
+ if (hint != null) {
+ unnestOp.getAnnotations().put(SUBPATH, hint.getSubPath());
+ }
unnestOp.getInputs().add(pUnnestExpr.second);
unnestOp.setSourceLocation(binaryCorrelate.getRightVariable().getSourceLocation());
return new Pair<>(unnestOp, rightVar);
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java
index b1c2f0a..68edc0d 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/function/DatasetRewriter.java
@@ -21,10 +21,14 @@
import static org.apache.asterix.common.api.IIdentifierMapper.Modifier.PLURAL;
import static org.apache.asterix.common.api.IIdentifierMapper.Modifier.SINGULAR;
import static org.apache.asterix.common.utils.IdentifierUtil.dataset;
+import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
+import org.apache.asterix.common.config.DatasetConfig;
import org.apache.asterix.common.exceptions.CompilationException;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.metadata.DatasetFullyQualifiedName;
@@ -102,6 +106,15 @@
}
DataSourceScanOperator scan = new DataSourceScanOperator(variables, dataSource);
scan.setSourceLocation(unnest.getSourceLocation());
+ if (dataset.getDatasetType() == DatasetConfig.DatasetType.EXTERNAL) {
+ Map<String, Object> unnestAnnotations = unnest.getAnnotations();
+ scan.getAnnotations().putAll(unnestAnnotations);
+ Map<String, Serializable> dataSourceProperties = dataSource.getProperties();
+ Object externalSubpath = unnestAnnotations.get(SUBPATH);
+ if (externalSubpath instanceof String) {
+ dataSourceProperties.put(SUBPATH, (String) externalSubpath);
+ }
+ }
List<Mutable<ILogicalOperator>> scanInpList = scan.getInputs();
scanInpList.addAll(unnest.getInputs());
opRef.setValue(scan);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp
index f203da0..1538bf9 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.000.ddl.sqlpp
@@ -63,4 +63,27 @@
("container"="playground"),
("definition"="json-data/single-line/json-array-of-objects"),
("format"="json")
+);
+
+drop dataset test6 if exists;
+CREATE EXTERNAL DATASET test6(test) USING %adapter% (
+%template%,
+("container"="playground"),
+("definition"="json-data/reviews"),
+("format"="json")
+);
+
+drop dataset test7 if exists;
+CREATE EXTERNAL DATASET test7(test) USING %adapter% (
+%template%,
+("container"="playground"),
+("format"="json")
+);
+
+drop dataset test8 if exists;
+CREATE EXTERNAL DATASET test8(test) USING %adapter% (
+%template%,
+("container"="playground"),
+("definition"="json-data"),
+("format"="json")
);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp
new file mode 100644
index 0000000..1b265f1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.008.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing subpath hint. the result should be = scanning test3
+select value test6 from /*+ subpath /multi-lines-with-arrays/json */ test6 order by id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp
new file mode 100644
index 0000000..9356366
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.009.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing subpath hint. the result should be = scanning test4
+select value test7 from /*+ subpath json-data/reviews/multi-lines-with-nested-objects/json */ test7 order by id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp
new file mode 100644
index 0000000..8425714
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.010.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing subpath hint. the result should be = scanning test5
+select value count(*) from /*+ subpath single-line/json-array-of-objects */ test8;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp
new file mode 100644
index 0000000..d0f08aa
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.011.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing that test6 a and test6 b are two different data sources and hence no replicate should exist in the plan
+explain select count(a.quarter) as cnt
+from /*+ subpath /multi-lines/json */ test6 a
+join test6 b on a.quarter = b.quarter;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp
new file mode 100644
index 0000000..3a675ef
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.012.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing that test6 a and test6 b are two different data sources and hence no replicate should exist in the plan
+select count(a.quarter) as cnt
+from /*+ subpath /multi-lines/json */ test6 a
+join test6 b on a.quarter = b.quarter;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp
new file mode 100644
index 0000000..e886adb
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.013.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing that test6 a and test6 b are the same data sources and hence replicate should exist in the plan
+explain select count(a.quarter) as cnt
+from /*+ subpath /multi-lines/json */ test6 a
+join /*+ subpath /multi-lines/json */ test6 b on a.quarter = b.quarter;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp
new file mode 100644
index 0000000..5b39d4f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/external-dataset/common/json/json/external_dataset.014.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+// testing that test6 a and test6 b are the same data sources and hence replicate should exist in the plan
+select count(a.quarter) as cnt
+from /*+ subpath /multi-lines/json */ test6 a
+join /*+ subpath /multi-lines/json */ test6 b on a.quarter = b.quarter;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm
new file mode 100644
index 0000000..7660e7e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.008.adm
@@ -0,0 +1,25 @@
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3 ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3 ] }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm
new file mode 100644
index 0000000..7643986
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.009.adm
@@ -0,0 +1,25 @@
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } }
+{ "id": 1, "year": null, "quarter": null, "review": "good", "array": [ 1, 2, 3 ], "nested": { "id": 1 } }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] }
+{ "id": 2, "year": null, "quarter": null, "review": "good", "array": [ 1, [ 1, 2 ], [ 1 ] ], "nested": { "id": 1 }, "nested2": [ { "id": 1 } ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] }
+{ "id": 3, "year": 2018, "quarter": null, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ { "nested": { "array": [ 1, 2 ] } } ] } } ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] }
+{ "id": 4, "year": 2018, "quarter": null, "review": "bad", "array": [ 1, 2, 3, { "nested1": { "id": 1, "nested2": { "id": 2, "nested3": [ { "nested4": null } ] } } } ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] }
+{ "id": 5, "year": 2018, "quarter": 1, "review": "good", "array": [ 1, 2, 3, { "nested": { "array": [ 1, 2 ] } } ] }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm
new file mode 100644
index 0000000..86babba
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.010.adm
@@ -0,0 +1 @@
+50128
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan
new file mode 100644
index 0000000..6ec7195
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.011.plan
@@ -0,0 +1,48 @@
+distribute result [$$48] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+-- DISTRIBUTE_RESULT |UNPARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- STREAM_PROJECT |UNPARTITIONED|
+ assign [$$48] <- [{"cnt": $$51}] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ASSIGN |UNPARTITIONED|
+ aggregate [$$51] <- [agg-sql-sum($$53)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- AGGREGATE |UNPARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
+ aggregate [$$53] <- [agg-sql-count($$49)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- AGGREGATE |PARTITIONED|
+ project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$49, $$50)) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 6000000.0]
+ -- HYBRID_HASH_JOIN [$$49][$$50] |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED|
+ project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$49] <- [$$a.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$a] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- HASH_PARTITION_EXCHANGE [$$50] |PARTITIONED|
+ project ([$$50]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$50] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm
new file mode 100644
index 0000000..6cd35ab
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.012.adm
@@ -0,0 +1 @@
+{ "cnt": 15600 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan
new file mode 100644
index 0000000..22d5bd4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.013.plan
@@ -0,0 +1,62 @@
+distribute result [$$47] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+-- DISTRIBUTE_RESULT |UNPARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ONE_TO_ONE_EXCHANGE |UNPARTITIONED|
+ project ([$$47]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- STREAM_PROJECT |UNPARTITIONED|
+ assign [$$47] <- [{"cnt": $$50}] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ASSIGN |UNPARTITIONED|
+ aggregate [$$50] <- [agg-sql-sum($$52)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- AGGREGATE |UNPARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- RANDOM_MERGE_EXCHANGE |PARTITIONED|
+ aggregate [$$52] <- [agg-sql-count($$48)] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- AGGREGATE |PARTITIONED|
+ project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 0.0, total-cost: 6000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ join (eq($$48, $$49)) [cardinality: 1000000.0, op-cost: 2000000.0, total-cost: 6000000.0]
+ -- HYBRID_HASH_JOIN [$$48][$$49] |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ project ([$$48]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$48] <- [$$49] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- REPLICATE |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED|
+ project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$49] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ replicate [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- REPLICATE |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- HASH_PARTITION_EXCHANGE [$$49] |PARTITIONED|
+ project ([$$49]) [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- STREAM_PROJECT |PARTITIONED|
+ assign [$$49] <- [$$b.getField("quarter")] [cardinality: 1000000.0, op-cost: 0.0, total-cost: 1000000.0]
+ -- ASSIGN |PARTITIONED|
+ exchange [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 2000000.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ data-scan []<-[$$b] <- test.test6 [cardinality: 1000000.0, op-cost: 1000000.0, total-cost: 1000000.0]
+ -- DATASOURCE_SCAN |PARTITIONED|
+ exchange [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- ONE_TO_ONE_EXCHANGE |PARTITIONED|
+ empty-tuple-source [cardinality: 0.0, op-cost: 0.0, total-cost: 0.0]
+ -- EMPTY_TUPLE_SOURCE |PARTITIONED|
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm
new file mode 100644
index 0000000..8006c78
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/external-dataset/common/json/json/external_dataset.014.adm
@@ -0,0 +1 @@
+{ "cnt": 1800 }
\ No newline at end of file
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java
new file mode 100644
index 0000000..e3e5ea3
--- /dev/null
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/annotations/ExternalSubpathAnnotation.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.common.annotations;
+
+import org.apache.hyracks.algebricks.core.algebra.expressions.IExpressionAnnotation;
+
+public final class ExternalSubpathAnnotation implements IExpressionAnnotation {
+
+ private final String subPath;
+
+ public ExternalSubpathAnnotation(String subPath) {
+ this.subPath = subPath == null ? "" : subPath.trim();
+ }
+
+ public String getSubPath() {
+ return subPath;
+ }
+}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 2097b4b..050a080 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -297,6 +297,7 @@
public static final String DEFINITION_FIELD_NAME = "definition";
public static final String CONTAINER_NAME_FIELD_NAME = "container";
+ public static final String SUBPATH = "subpath";
public static class ParquetOptions {
private ParquetOptions() {
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index ba07629..29e04e9 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -738,10 +738,28 @@
public static String getPrefix(Map<String, String> configuration, boolean appendSlash) {
String definition = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME);
- if (definition != null && !definition.isEmpty()) {
+ String subPath = configuration.get(ExternalDataConstants.SUBPATH);
+ boolean hasDefinition = definition != null && !definition.isEmpty();
+ boolean hasSubPath = subPath != null && !subPath.isEmpty();
+ if (hasDefinition && !hasSubPath) {
return appendSlash ? definition + (!definition.endsWith("/") ? "/" : "") : definition;
}
- return "";
+ String fullPath = "";
+ if (hasSubPath) {
+ if (!hasDefinition) {
+ fullPath = subPath.startsWith("/") ? subPath.substring(1) : subPath;
+ } else {
+ // concatenate definition + subPath:
+ if (definition.endsWith("/") && subPath.startsWith("/")) {
+ subPath = subPath.substring(1);
+ } else if (!definition.endsWith("/") && !subPath.startsWith("/")) {
+ definition = definition + "/";
+ }
+ fullPath = definition + subPath;
+ }
+ fullPath = appendSlash ? fullPath + (!fullPath.endsWith("/") ? "/" : "") : fullPath;
+ }
+ return fullPath;
}
/**
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java
index f565fcd..da3504d 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java
+++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/parser/SqlppHint.java
@@ -49,6 +49,7 @@
GEN_FIELDS_HINT("gen-fields"),
SINGLE_DATASET_PREDICATE_SELECTIVITY_HINT("selectivity"),
JOIN_PREDICATE_PRODUCTIVITY_HINT("productivity"),
+ SUBPATH_HINT("subpath"),
// data generator hints
DGEN_HINT("dgen");
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java
index 4b0caca..8fbad4f 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java
+++ b/asterixdb/asterix-lang-sqlpp/src/main/java/org/apache/asterix/lang/sqlpp/rewrites/visitor/VariableCheckAndRewriteVisitor.java
@@ -203,6 +203,7 @@
argList.add(new LiteralExpr(new StringLiteral(datasetName)));
}
CallExpr callExpr = new CallExpr(new FunctionSignature(BuiltinFunctions.DATASET), argList);
+ callExpr.addHints(varExpr.getHints());
callExpr.setSourceLocation(sourceLoc);
return callExpr;
}
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
index 759fe90..e03a3e3 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
+++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
@@ -52,6 +52,7 @@
import org.apache.asterix.common.annotations.DateBetweenYearsDataGen;
import org.apache.asterix.common.annotations.DatetimeAddRandHoursDataGen;
import org.apache.asterix.common.annotations.DatetimeBetweenYearsDataGen;
+import org.apache.asterix.common.annotations.ExternalSubpathAnnotation;
import org.apache.asterix.common.annotations.FieldIntervalDataGen;
import org.apache.asterix.common.annotations.FieldValFileDataGen;
import org.apache.asterix.common.annotations.FieldValFileSameIndexDataGen;
@@ -4982,7 +4983,16 @@
List<AbstractBinaryCorrelateClause> correlateClauses = new ArrayList<AbstractBinaryCorrelateClause>();
}
{
- leftExpr = Expression() ((<AS>)? leftVar = Variable())? (<AT> posVar = Variable())?
+ leftExpr = Expression()
+ {
+ if (leftExpr.getKind() == Expression.Kind.VARIABLE_EXPRESSION) {
+ Token hintToken = fetchHint(token, SqlppHint.SUBPATH_HINT);
+ if (hintToken != null) {
+ String subPath = hintToken.hintParams;
+ ((VariableExpr) leftExpr).addHint(new ExternalSubpathAnnotation(subPath));
+ }
+ }
+ } ((<AS>)? leftVar = Variable())? (<AT> posVar = Variable())?
(
(
correlateClause = JoinOrUnnestClause(JoinType.INNER, UnnestType.INNER)
@@ -5053,7 +5063,16 @@
VariableExpr posVar = null;
}
{
- rightExpr = Expression() ((<AS>)? rightVar = Variable())? (<AT> posVar = Variable())?
+ rightExpr = Expression()
+ {
+ if (rightExpr.getKind() == Expression.Kind.VARIABLE_EXPRESSION) {
+ Token hintToken = fetchHint(token, SqlppHint.SUBPATH_HINT);
+ if (hintToken != null) {
+ String subPath = hintToken.hintParams;
+ ((VariableExpr) rightExpr).addHint(new ExternalSubpathAnnotation(subPath));
+ }
+ }
+ } ((<AS>)? rightVar = Variable())? (<AT> posVar = Variable())?
{
if (rightVar == null) {
rightVar = ExpressionToVariableUtil.getGeneratedVariable(rightExpr, true);
diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java
index 661b954..66ea5a7 100644
--- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java
+++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/declared/DatasetDataSource.java
@@ -19,8 +19,10 @@
package org.apache.asterix.metadata.declared;
import static org.apache.asterix.external.util.ExternalDataConstants.KEY_EXTERNAL_SCAN_BUFFER_SIZE;
+import static org.apache.asterix.external.util.ExternalDataConstants.SUBPATH;
import java.io.IOException;
+import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -120,7 +122,8 @@
IProjectionInfo<?> projectionInfo) throws AlgebricksException {
switch (dataset.getDatasetType()) {
case EXTERNAL:
- Dataset externalDataset = ((DatasetDataSource) dataSource).getDataset();
+ DatasetDataSource externalDataSource = (DatasetDataSource) dataSource;
+ Dataset externalDataset = externalDataSource.getDataset();
String itemTypeName = externalDataset.getItemTypeName();
IAType itemType = MetadataManager.INSTANCE.getDatatype(metadataProvider.getMetadataTxnContext(),
externalDataset.getItemTypeDataverseName(), itemTypeName).getDatatype();
@@ -129,6 +132,7 @@
PhysicalOptimizationConfig physicalOptimizationConfig = context.getPhysicalOptimizationConfig();
int externalScanBufferSize = physicalOptimizationConfig.getExternalScanBufferSize();
Map<String, String> properties = addExternalProjectionInfo(projectionInfo, edd.getProperties());
+ properties = addSubPath(externalDataSource.getProperties(), properties);
properties.put(KEY_EXTERNAL_SCAN_BUFFER_SIZE, String.valueOf(externalScanBufferSize));
ITypedAdapterFactory adapterFactory = metadataProvider.getConfiguredAdapterFactory(externalDataset,
edd.getAdapter(), properties, (ARecordType) itemType, null, context.getWarningCollector());
@@ -168,6 +172,16 @@
return propertiesCopy;
}
+ private Map<String, String> addSubPath(Map<String, Serializable> dataSourceProps, Map<String, String> properties) {
+ Serializable subPath = dataSourceProps.get(SUBPATH);
+ if (!(subPath instanceof String)) {
+ return properties;
+ }
+ Map<String, String> propertiesCopy = new HashMap<>(properties);
+ propertiesCopy.put(SUBPATH, (String) subPath);
+ return propertiesCopy;
+ }
+
private int[] createFilterIndexes(List<LogicalVariable> filterVars, IOperatorSchema opSchema) {
if (filterVars != null && !filterVars.isEmpty()) {
final int size = filterVars.size();
@@ -185,4 +199,8 @@
return dataset.getDatasetType() == DatasetType.EXTERNAL;
}
+ @Override
+ public boolean compareProperties() {
+ return dataset.getDatasetType() == DatasetType.EXTERNAL;
+ }
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java
index e15b699..8b4e56d 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/metadata/IDataSource.java
@@ -18,7 +18,9 @@
*/
package org.apache.hyracks.algebricks.core.algebra.metadata;
+import java.io.Serializable;
import java.util.List;
+import java.util.Map;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.properties.FunctionalDependency;
@@ -37,4 +39,10 @@
public boolean isScanAccessPathALeaf();
public INodeDomain getDomain();
+
+ public Map<String, Serializable> getProperties();
+
+ default boolean compareProperties() {
+ return false;
+ }
}
diff --git a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
index b446075..9e2e87c 100644
--- a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
+++ b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/algebra/operators/logical/visitors/IsomorphismOperatorVisitor.java
@@ -34,6 +34,7 @@
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalPlan;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
+import org.apache.hyracks.algebricks.core.algebra.metadata.IDataSource;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AggregateOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
@@ -476,6 +477,12 @@
if (!isomorphic) {
return Boolean.FALSE;
}
+ IDataSource<?> dataSource = op.getDataSource();
+ IDataSource<?> argDataSource = argScan.getDataSource();
+ if (dataSource.compareProperties() && argDataSource.compareProperties()
+ && !Objects.equals(dataSource.getProperties(), argDataSource.getProperties())) {
+ return Boolean.FALSE;
+ }
DataSourceScanOperator scanOpArg = (DataSourceScanOperator) copyAndSubstituteVar(op, arg);
ILogicalExpression opCondition = op.getSelectCondition() != null ? op.getSelectCondition().getValue() : null;
ILogicalExpression argCondition =