[ASTERIXDB-2685][FUN] regex-split avoid creating unnecessary objects if not needed
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Switched to using the RegExpMatcher class to avoid creating
unnecessary objects (compiling patterns) if the next tuple
pattern is similar to the previous one.
Change-Id: Ie6920fb049f7b333e3d41de154839d83b5280926
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/7223
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com>
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
index da6a206..8de8c8a 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringRegExpSplitDescriptor.java
@@ -30,6 +30,7 @@
import org.apache.asterix.om.types.AbstractCollectionType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.utils.RegExpMatcher;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.runtime.base.IEvaluatorContext;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
@@ -65,10 +66,13 @@
private final AbstractCollectionType collectionType =
new AOrderedListType(BuiltinType.ASTRING, BuiltinType.ASTRING.getTypeName());
+ private final RegExpMatcher matcher = new RegExpMatcher();
+
@Override
protected void process(UTF8StringPointable srcPtr, UTF8StringPointable patternPtr,
IPointable result) throws HyracksDataException {
- String[] splits = srcPtr.toString().split(patternPtr.toString());
+ matcher.build(srcPtr, patternPtr);
+ String[] splits = matcher.split();
// Result is a list of type strings
listBuilder.reset(collectionType);
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
index 778df5b..1a190cc 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
@@ -209,4 +209,13 @@
matcher.appendTail(resultBuf);
return resultBuf.toString();
}
+
+ /**
+ * Splits the provided source string using the provided regular expression.
+ *
+ * @return the string split tokens
+ */
+ public String[] split() {
+ return pattern.split(charSeq);
+ }
}