[ASTERIXDB-3235][EXT]: Refactor external prefix + add filter evaluate exprssion (P1)
Change-Id: I2b91d29a2241218baea439042a35a1e6d19ad5e2
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17697
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
index b2c405a..2736969 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/external_dataset/PrefixComputedFieldsTest.java
@@ -34,14 +34,14 @@
@Test
public void test() throws Exception {
- ExternalDataPrefix prefix = new ExternalDataPrefix(null);
+ ExternalDataPrefix prefix = new ExternalDataPrefix(Collections.emptyMap());
assertEquals("", prefix.getOriginal());
assertEquals("", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
assertEquals(Collections.emptyList(), prefix.getSegments());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldNames());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes());
String prefix1 = "";
prefix = new ExternalDataPrefix(prefix1);
@@ -49,9 +49,9 @@
assertEquals("", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
assertEquals(Collections.emptyList(), prefix.getSegments());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldNames());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes());
String prefix2 = "hotel";
prefix = new ExternalDataPrefix(prefix2);
@@ -59,9 +59,9 @@
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
assertEquals(List.of("hotel"), prefix.getSegments());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(Collections.emptyList(), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldNames());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldTypes());
+ assertEquals(Collections.emptyList(), prefix.getComputedFieldSegmentIndexes());
String prefix3 = "hotel/{hotel-id:inT}/";
prefix = new ExternalDataPrefix(prefix3);
@@ -69,9 +69,9 @@
assertEquals("hotel/", prefix.getRoot());
assertTrue(prefix.isEndsWithSlash());
assertEquals(List.of("hotel", "{hotel-id:inT}"), prefix.getSegments());
- assertEquals(List.of(List.of("hotel-id")), prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(List.of(AINT32), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(List.of(1), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(List.of("hotel-id"), prefix.getComputedFieldNames());
+ assertEquals(List.of(AINT32), prefix.getComputedFieldTypes());
+ assertEquals(List.of(1), prefix.getComputedFieldSegmentIndexes());
String prefix4 = "hotel/{hotel-id:int}-{hotel-name:sTRing}";
prefix = new ExternalDataPrefix(prefix4);
@@ -79,10 +79,9 @@
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
assertEquals(List.of("hotel", "{hotel-id:int}-{hotel-name:sTRing}"), prefix.getSegments());
- assertEquals(List.of(List.of("hotel-id"), List.of("hotel-name")),
- prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(List.of(1, 1), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(List.of("hotel-id", "hotel-name"), prefix.getComputedFieldNames());
+ assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldTypes());
+ assertEquals(List.of(1, 1), prefix.getComputedFieldSegmentIndexes());
String prefix5 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}-{month:int}-{day:int}/";
prefix = new ExternalDataPrefix(prefix5);
@@ -92,12 +91,9 @@
assertTrue(prefix.isEndsWithSlash());
assertEquals(List.of("hotel", "something", "{hotel-id:int}-{hotel-name:sTRing}", "review",
"{year:int}-{month:int}-{day:int}"), prefix.getSegments());
- assertEquals(
- List.of(List.of("hotel-id"), List.of("hotel-name"), List.of("year"), List.of("month"), List.of("day")),
- prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32),
- prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(List.of(2, 2, 4, 4, 4), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(List.of("hotel-id", "hotel-name", "year", "month", "day"), prefix.getComputedFieldNames());
+ assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), prefix.getComputedFieldTypes());
+ assertEquals(List.of(2, 2, 4, 4, 4), prefix.getComputedFieldSegmentIndexes());
String prefix6 = "hotel/something/{hotel-id:int}-{hotel-name:sTRing}/review/{year:int}/{month:int}/{day:int}";
prefix = new ExternalDataPrefix(prefix6);
@@ -107,21 +103,17 @@
assertFalse(prefix.isEndsWithSlash());
assertEquals(List.of("hotel", "something", "{hotel-id:int}-{hotel-name:sTRing}", "review", "{year:int}",
"{month:int}", "{day:int}"), prefix.getSegments());
- assertEquals(
- List.of(List.of("hotel-id"), List.of("hotel-name"), List.of("year"), List.of("month"), List.of("day")),
- prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32),
- prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(List.of(2, 2, 4, 5, 6), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(List.of("hotel-id", "hotel-name", "year", "month", "day"), prefix.getComputedFieldNames());
+ assertEquals(List.of(AINT32, ASTRING, AINT32, AINT32, AINT32), prefix.getComputedFieldTypes());
+ assertEquals(List.of(2, 2, 4, 5, 6), prefix.getComputedFieldSegmentIndexes());
String prefix7 = "hotel/{hotel.details.id:int}-{hotel-name:sTRing}";
prefix = new ExternalDataPrefix(prefix7);
assertEquals("hotel/{hotel.details.id:int}-{hotel-name:sTRing}", prefix.getOriginal());
assertEquals("hotel", prefix.getRoot());
assertFalse(prefix.isEndsWithSlash());
- assertEquals(List.of(List.of("hotel", "details", "id"), List.of("hotel-name")),
- prefix.getComputedFieldDetails().getComputedFieldNames());
- assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldDetails().getComputedFieldTypes());
- assertEquals(List.of(1, 1), prefix.getComputedFieldDetails().getComputedFieldIndexes());
+ assertEquals(List.of("hotel.details.id", "hotel-name"), prefix.getComputedFieldNames());
+ assertEquals(List.of(AINT32, ASTRING), prefix.getComputedFieldTypes());
+ assertEquals(List.of(1, 1), prefix.getComputedFieldSegmentIndexes());
}
}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java
index 22cd20a..a169ecb 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluator.java
@@ -18,14 +18,14 @@
*/
package org.apache.asterix.common.external;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
public interface IExternalFilterEvaluator {
boolean isEmpty();
boolean isComputedFieldUsed(int index);
- void setValue(int index, String stringValue) throws HyracksDataException;
+ void setValue(int index, String stringValue) throws AlgebricksException;
- boolean evaluate() throws HyracksDataException;
+ boolean evaluate() throws AlgebricksException;
}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java
index 38a38a6..c29e554 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/IExternalFilterEvaluatorFactory.java
@@ -20,11 +20,11 @@
import java.io.Serializable;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.application.IServiceContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
public interface IExternalFilterEvaluatorFactory extends Serializable {
IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector)
- throws HyracksDataException;
+ throws AlgebricksException;
}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java
index 78ebeb4..e48ad80 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluator.java
@@ -18,8 +18,6 @@
*/
package org.apache.asterix.common.external;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
-
class NoOpExternalFilterEvaluator implements IExternalFilterEvaluator {
static final IExternalFilterEvaluator INSTANCE = new NoOpExternalFilterEvaluator();
@@ -37,12 +35,12 @@
}
@Override
- public void setValue(int index, String stringValue) throws HyracksDataException {
+ public void setValue(int index, String stringValue) {
throw new IndexOutOfBoundsException("Number of paths is 0");
}
@Override
- public boolean evaluate() throws HyracksDataException {
+ public boolean evaluate() {
return true;
}
}
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java
index 4b5bebb..7b8792e 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/external/NoOpExternalFilterEvaluatorFactory.java
@@ -19,7 +19,6 @@
package org.apache.asterix.common.external;
import org.apache.hyracks.api.application.IServiceContext;
-import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.exceptions.IWarningCollector;
public class NoOpExternalFilterEvaluatorFactory implements IExternalFilterEvaluatorFactory {
@@ -30,8 +29,7 @@
}
@Override
- public IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector)
- throws HyracksDataException {
+ public IExternalFilterEvaluator create(IServiceContext serviceContext, IWarningCollector warningCollector) {
return NoOpExternalFilterEvaluator.INSTANCE;
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
index 4fc63c6..cfa1e46 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/AwsS3InputStreamFactory.java
@@ -23,17 +23,14 @@
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
-import java.util.function.Supplier;
+import org.apache.asterix.common.external.IExternalFilterEvaluator;
import org.apache.asterix.common.external.IExternalFilterEvaluatorFactory;
import org.apache.asterix.external.api.AsterixInputStream;
import org.apache.asterix.external.input.record.reader.abstracts.AbstractExternalInputStreamFactory;
-import org.apache.asterix.external.util.ExternalDataConstants;
import org.apache.asterix.external.util.ExternalDataPrefix;
import org.apache.asterix.external.util.ExternalDataUtils;
import org.apache.asterix.external.util.aws.s3.S3Utils;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.IAType;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.api.application.IServiceContext;
import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -61,51 +58,30 @@
IncludeExcludeMatcher includeExcludeMatcher = ExternalDataUtils.getIncludeExcludeMatchers(configuration);
//Get a list of S3 objects
- String prefix = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME);
- ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(prefix);
+ ExternalDataPrefix externalDataPrefix = new ExternalDataPrefix(configuration);
configuration.put(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME, externalDataPrefix.getRoot());
// TODO(htowaileb): Since we're using the root to load the files then start filtering, it might end up being
// very expensive since at the root of the prefix we might load millions of files, we should consider (when
// possible) to get the value and add it
List<S3Object> filesOnly = S3Utils.listS3Objects(configuration, includeExcludeMatcher, warningCollector);
-
- filesOnly = filterPrefixes(externalDataPrefix, filesOnly, () -> true);
+ filterPrefixes(externalDataPrefix, filesOnly, filterEvaluatorFactory.create(ctx, warningCollector));
// Distribute work load amongst the partitions
distributeWorkLoad(filesOnly, getPartitionsCount());
}
private List<S3Object> filterPrefixes(ExternalDataPrefix prefix, List<S3Object> filesOnly,
- Supplier<Boolean> evaluator) {
+ IExternalFilterEvaluator evaluator) throws AlgebricksException {
- // if no computed fields, return the original list
- if (prefix.getComputedFieldDetails().isEmpty()) {
+ // if no computed fields or empty files list, return the original list
+ if (filesOnly.isEmpty() || !prefix.hasComputedFields()) {
return filesOnly;
}
List<S3Object> filteredList = new ArrayList<>();
for (S3Object file : filesOnly) {
- List<String> segments = ExternalDataPrefix.getPrefixSegments(file.key());
- boolean match = false;
-
- // if the object key has fewer segments than the expected prefix, then filter it out
- // TODO(htowaileb): potentially also exclude if the size matches, key should be longer than prefix
- if (segments.size() < prefix.getComputedFieldDetails().getComputedFieldNames().size()) {
- continue;
- }
-
- for (int i = 0; i < prefix.getComputedFieldDetails().getComputedFieldNames().size(); i++) {
- int index = prefix.getComputedFieldDetails().getComputedFieldIndexes().get(i);
-
- // TODO(htowaileb): evaluator will container an expression that evaluates whether to include an object or not
- match = evaluator.get();
- if (!match) {
- break;
- }
- }
-
- if (match) {
+ if (prefix.evaluate(file.key(), evaluator)) {
filteredList.add(file);
}
}
@@ -113,10 +89,6 @@
return filteredList;
}
- private ARecordType createRecord(String[] fieldNames, IAType[] fieldTypes) {
- return new ARecordType("root", fieldNames, fieldTypes, false);
- }
-
/**
* To efficiently utilize the parallelism, work load will be distributed amongst the partitions based on the file
* size.
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
index c3419c1..97bf776 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataPrefix.java
@@ -20,8 +20,8 @@
package org.apache.asterix.external.util;
import static org.apache.asterix.external.util.ExternalDataConstants.COMPUTED_FIELD_PATTERN;
+import static org.apache.asterix.external.util.ExternalDataConstants.DEFINITION_FIELD_NAME;
import static org.apache.asterix.external.util.ExternalDataConstants.PREFIX_DEFAULT_DELIMITER;
-import static org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil.getRecordTypeWithFieldTypes;
import java.util.ArrayList;
import java.util.Arrays;
@@ -35,22 +35,28 @@
import org.apache.asterix.common.exceptions.CompilationException;
import org.apache.asterix.common.exceptions.ErrorCode;
+import org.apache.asterix.common.external.IExternalFilterEvaluator;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.BuiltinTypeMap;
import org.apache.asterix.om.types.IAType;
+import org.apache.asterix.om.utils.ProjectionFiltrationTypeUtil;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
public class ExternalDataPrefix {
private final String original;
- private final String root;
+ private String root;
private final boolean endsWithSlash;
-
private final List<String> segments;
- private final ComputedFieldDetails computedFieldDetails;
+
+ private final List<String> computedFieldNames = new ArrayList<>();
+ private final List<IAType> computedFieldTypes = new ArrayList<>();
+ private final List<Integer> computedFieldSegmentIndexes = new ArrayList<>();
+ private final List<ARecordType> paths = new ArrayList<>();
+ private final Map<Integer, Pair<List<String>, List<IAType>>> computedFields = new HashMap<>();
public static final String PREFIX_ROOT_FIELD_NAME = "prefix-root";
public static final Set<ATypeTag> supportedTypes = new HashSet<>();
@@ -60,62 +66,83 @@
supportedTypes.add(BuiltinType.AINT32.getTypeTag());
}
+ public ExternalDataPrefix(Map<String, String> configuration) throws AlgebricksException {
+ this(configuration.get(DEFINITION_FIELD_NAME));
+ }
+
public ExternalDataPrefix(String prefix) throws AlgebricksException {
this.original = prefix != null ? prefix : "";
this.endsWithSlash = this.original.endsWith("/");
- this.segments = getPrefixSegments(this.original);
+ segments = extractPrefixSegments(original);
+ extractComputedFields();
+ extractRoot();
- computedFieldDetails = getComputedFields(segments);
- this.root = getPrefixRoot(segments, computedFieldDetails.getComputedFieldIndexes());
+ for (int i = 0; i < computedFieldSegmentIndexes.size(); i++) {
+ int segmentIndex = computedFieldSegmentIndexes.get(i);
+
+ if (computedFields.containsKey(segmentIndex)) {
+ Pair<List<String>, List<IAType>> pair = computedFields.get(segmentIndex);
+ pair.getLeft().add(computedFieldNames.get(i));
+ pair.getRight().add(computedFieldTypes.get(i));
+ } else {
+ List<String> names = new ArrayList<>();
+ List<IAType> types = new ArrayList<>();
+
+ names.add(computedFieldNames.get(i));
+ types.add(computedFieldTypes.get(i));
+ computedFields.put(segmentIndex, Pair.of(names, types));
+ }
+ }
}
public String getOriginal() {
return original;
}
+ public boolean isEndsWithSlash() {
+ return endsWithSlash;
+ }
+
public String getRoot() {
return root;
}
- public boolean isEndsWithSlash() {
- return endsWithSlash;
+ public boolean hasComputedFields() {
+ return !computedFieldNames.isEmpty();
}
public List<String> getSegments() {
return segments;
}
- public ComputedFieldDetails getComputedFieldDetails() {
- return computedFieldDetails;
+ public List<String> getComputedFieldNames() {
+ return computedFieldNames;
+ }
+
+ public List<IAType> getComputedFieldTypes() {
+ return computedFieldTypes;
+ }
+
+ public List<Integer> getComputedFieldSegmentIndexes() {
+ return computedFieldSegmentIndexes;
+ }
+
+ public List<ARecordType> getPaths() {
+ return paths;
}
/**
- * returns the segments of a prefix, separated by the delimiter
- *
- * @param prefix prefix
- * @return an array of prefix segments
+ * extracts the segments of a prefix, separated by the delimiter
*/
- public static List<String> getPrefixSegments(String prefix) {
+ private List<String> extractPrefixSegments(String prefix) {
return prefix.isEmpty() ? Collections.emptyList() : Arrays.asList(prefix.split(PREFIX_DEFAULT_DELIMITER));
}
/**
- * Extracts and returns the computed fields and their indexes from the provided prefix
- * @param prefix prefix
- *
- * @return Pair of computed field names and their segment index in the prefix
+ * extracts and returns the computed fields and their indexes from the provided prefix
*/
- public static ComputedFieldDetails getComputedFields(String prefix) throws AlgebricksException {
- List<String> segments = getPrefixSegments(prefix);
- return getComputedFields(segments);
- }
-
- public static ComputedFieldDetails getComputedFields(List<String> segments) throws AlgebricksException {
- List<List<String>> computedFieldsNames = new ArrayList<>();
- List<IAType> computedFieldTypes = new ArrayList<>();
- List<Integer> computedFieldIndexes = new ArrayList<>();
-
+ private void extractComputedFields() throws AlgebricksException {
// check if there are any segments before doing any testing
if (!segments.isEmpty()) {
// search for computed fields in each segment
@@ -132,15 +159,38 @@
IAType type = BuiltinTypeMap.getBuiltinType(typePart);
validateSupported(type.getTypeTag());
- List<String> nameParts = List.of(namePart.split("\\."));
- computedFieldsNames.add(nameParts);
+ computedFieldNames.add(namePart);
computedFieldTypes.add(type);
- computedFieldIndexes.add(i);
+ computedFieldSegmentIndexes.add(i);
+
+ List<String> nameParts = List.of(namePart.split("\\."));
+ paths.add(ProjectionFiltrationTypeUtil.getPathRecordType(nameParts));
}
}
}
+ }
- return new ComputedFieldDetails(computedFieldsNames, computedFieldTypes, computedFieldIndexes);
+ /**
+ * Returns the longest static path (root) before encountering the first computed field
+ */
+ private void extractRoot() {
+ StringBuilder builder = new StringBuilder();
+
+ // check if there are any computed fields before doing any testing
+ if (computedFieldNames.isEmpty()) {
+ root = original;
+ return;
+ }
+
+ // construct all static parts before encountering the first computed field
+ for (int i = 0; i < computedFieldSegmentIndexes.get(0); i++) {
+ builder.append(segments.get(i)).append("/");
+ }
+
+ // remove last "/" and append it only if needed
+ root = builder.toString();
+ root = root.substring(0, root.length() - 1);
+ root = ExternalDataUtils.appendSlash(root, endsWithSlash);
}
/**
@@ -149,103 +199,50 @@
* @param type type to check
* @throws CompilationException exception if type is not supported
*/
- private static void validateSupported(ATypeTag type) throws CompilationException {
+ private void validateSupported(ATypeTag type) throws CompilationException {
if (!supportedTypes.contains(type)) {
throw new CompilationException(ErrorCode.UNSUPPORTED_COMPUTED_FIELD_TYPE, type);
}
}
/**
- * Returns the longest static path (root) before encountering the first computed field
+ * Evaluates whether the provided key satisfies the conditions of the evaluator or not
*
- * @param prefix prefix
- * @return prefix root
+ * @param key ke
+ * @param evaluator evaluator
+ *
+ * @return true if key satisfies the evaluator conditions, false otherwise
*/
- public String getPrefixRoot(String prefix) throws AlgebricksException {
- List<String> prefixSegments = getPrefixSegments(prefix);
- List<Integer> computedFieldIndexes = getComputedFields(prefix).getComputedFieldIndexes();
- return getPrefixRoot(prefixSegments, computedFieldIndexes);
+ public boolean evaluate(String key, IExternalFilterEvaluator evaluator) throws AlgebricksException {
+ List<String> keySegments = extractPrefixSegments(key);
+
+ // segments of object key have to be larger than segments of the prefix
+ if (keySegments.size() <= segments.size()) {
+ return false;
+ }
+
+ // extract values for all compute fields and set them in the evaluator
+ List<String> values = extractValues(keySegments);
+ for (int i = 0; i < computedFieldNames.size(); i++) {
+ evaluator.setValue(i, values.get(i));
+ }
+
+ return evaluator.evaluate();
}
- public String getPrefixRoot(List<String> prefixSegments, List<Integer> computedFieldIndexes) {
- StringBuilder root = new StringBuilder();
+ /**
+ * extracts the computed fields values from the object's key
+ *
+ * @param keySegments object's key segments
+ * @return list of computed field values
+ */
+ private List<String> extractValues(List<String> keySegments) {
+ List<String> values = new ArrayList<>();
- // check if there are any computed fields before doing any testing
- if (computedFieldIndexes.size() == 0) {
- return this.original;
+ for (Integer computedFieldSegmentIndex : computedFieldSegmentIndexes) {
+ values.add(keySegments.get(computedFieldSegmentIndex));
}
- // construct all static parts before encountering the first computed field
- for (int i = 0; i < computedFieldIndexes.get(0); i++) {
- root.append(prefixSegments.get(i)).append("/");
- }
-
- // remove last "/" and append it only if needed
- String finalRoot = root.toString();
- finalRoot = finalRoot.substring(0, finalRoot.length() - 1);
- return ExternalDataUtils.appendSlash(finalRoot, this.endsWithSlash);
- }
-
- public static class ComputedFieldDetails {
- private final List<List<String>> computedFieldNames;
- private final List<IAType> computedFieldTypes;
- private final List<Integer> computedFieldIndexes;
- private final Map<Integer, Pair<List<List<String>>, List<IAType>>> computedFields = new HashMap<>();
- private final ARecordType recordType;
-
- public ComputedFieldDetails(List<List<String>> computedFieldNames, List<IAType> computedFieldTypes,
- List<Integer> computedFieldIndexes) throws AlgebricksException {
- this.computedFieldNames = computedFieldNames;
- this.computedFieldTypes = computedFieldTypes;
- this.computedFieldIndexes = computedFieldIndexes;
-
- this.recordType = getRecordTypeWithFieldTypes(computedFieldNames, computedFieldTypes);
-
- for (int i = 0; i < computedFieldIndexes.size(); i++) {
- int index = computedFieldIndexes.get(i);
-
- if (computedFields.containsKey(index)) {
- Pair<List<List<String>>, List<IAType>> pair = computedFields.get(index);
- pair.getLeft().add(computedFieldNames.get(i));
- pair.getRight().add(computedFieldTypes.get(i));
- } else {
- List<List<String>> names = new ArrayList<>();
- List<IAType> types = new ArrayList<>();
-
- names.add(computedFieldNames.get(i));
- types.add(computedFieldTypes.get(i));
- computedFields.put(index, Pair.of(names, types));
- }
- }
- }
-
- public boolean isEmpty() {
- return computedFieldNames.isEmpty();
- }
-
- public List<List<String>> getComputedFieldNames() {
- return computedFieldNames;
- }
-
- public List<IAType> getComputedFieldTypes() {
- return computedFieldTypes;
- }
-
- public List<Integer> getComputedFieldIndexes() {
- return computedFieldIndexes;
- }
-
- public ARecordType getRecordType() {
- return recordType;
- }
-
- public Map<Integer, Pair<List<List<String>>, List<IAType>>> getComputedFields() {
- return computedFields;
- }
-
- @Override
- public String toString() {
- return computedFields.toString();
- }
+ return values;
}
}
diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 02653f3..9d36b4a 100644
--- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -752,11 +752,20 @@
}
public static String getPrefix(Map<String, String> configuration, boolean appendSlash) {
+ String root = configuration.get(ExternalDataPrefix.PREFIX_ROOT_FIELD_NAME);
String definition = configuration.get(ExternalDataConstants.DEFINITION_FIELD_NAME);
String subPath = configuration.get(ExternalDataConstants.SUBPATH);
+ boolean hasRoot = root != null && !root.isEmpty();
boolean hasDefinition = definition != null && !definition.isEmpty();
boolean hasSubPath = subPath != null && !subPath.isEmpty();
+
+ // if computed fields are used, subpath will not take effect. we can tell if we're using a computed field or
+ // not by checking if the root matches the definition or not, they never match if computed fields are used
+ if (hasRoot && hasDefinition && !root.equals(definition)) {
+ return appendSlash(root, appendSlash);
+ }
+
if (hasDefinition && !hasSubPath) {
return appendSlash(definition, appendSlash);
}