[NO ISSUE][FUN] Fix string replace with negative limit
- user model changes: yes
- storage format changes: no
- interface changes: no
Details:
- REPLACE() and REGEXP_REPLACE() must replace all occurrences
if negative limit is specified
Change-Id: I240b57bb90f5349d09dc91e868277b9cdcba89b8
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2868
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/replace_with_limit/replace_with_limit.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/replace_with_limit/replace_with_limit.1.query.sqlpp
index 3c33e49..4eb0963 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/replace_with_limit/replace_with_limit.1.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/replace_with_limit/replace_with_limit.1.query.sqlpp
@@ -30,6 +30,6 @@
[ 8, replace("_ab_ba_ab_ab_ab_", "ab", "xyz", bigint("4")) ],
[ 9, replace("abracadabra_abracadabra_xyz","bra","*BRA*", 3) ],
[ 10, replace("ab_ba_ab", "ab", "ba", 0) ],
- [ 11, replace("ab_ba_ab", "ab", "ba", -100) ]
+ [ 11, replace("ab_ba_ab_ba_ab", "ab", "ba", -100) ]
] t
order by t[0]
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_replace/regexp_replace.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_replace/regexp_replace.6.adm
index 6fc838d..1005ff0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_replace/regexp_replace.6.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/regexp_replace/regexp_replace.6.adm
@@ -1 +1 @@
-{ "result1": "abracadabra", "result2": "abracadabra", "result3": "akkkcadabra", "result4": "akkkcadakkk_abracadabra", "result5": "akkkcadakkk_akkkcadakkk" }
\ No newline at end of file
+{ "result1": "akkkcadakkk", "result2": "abracadabra", "result3": "akkkcadabra", "result4": "akkkcadakkk_abracadabra", "result5": "akkkcadakkk_akkkcadakkk" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/replace_with_limit/replace_with_limit.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/replace_with_limit/replace_with_limit.1.adm
index 1e7020e..cc5bbeb 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/replace_with_limit/replace_with_limit.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/replace_with_limit/replace_with_limit.1.adm
@@ -9,4 +9,4 @@
[ 8, "_xyz_ba_xyz_xyz_xyz_" ]
[ 9, "a*BRA*cada*BRA*_a*BRA*cadabra_xyz" ]
[ 10, "ab_ba_ab" ]
-[ 11, "ab_ba_ab" ]
\ No newline at end of file
+[ 11, "ba_ba_ba_ba_ba" ]
diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
index 1f9ac19..c9145fd 100644
--- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
+++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
@@ -340,7 +340,8 @@
* `string_replacement` : a pattern `string` to be used as the replacement,
* `string_flag` : (Optional) a `string` with flags to be used during replace.
* The following modes are enabled with these flags: dotall (s), multiline (m), case_insensitive (i), and comments and whitespace (x).
- * `replacement_limit`: (Optional) an `integer` specifying the maximum number of replacements to make
+ * `replacement_limit`: (Optional) an `integer` specifying the maximum number of replacements to make
+ (if negative then all occurrences will be replaced)
* Return Value:
* Returns a `string` that is obtained after the replacements,
* `missing` if any argument is a `missing` value,
@@ -395,7 +396,7 @@
* `search_string` : a `string` substring to be searched for,
* `replacement_string` : a `string` to be used as the replacement,
* `limit` : (Optional) an `integer` - maximum number of occurrences to be replaced.
- If not specified then all occurrences will be replaced
+ If not specified or negative then all occurrences will be replaced
* Return Value:
* Returns a `string` that is obtained after the replacements,
* `missing` if any argument is a `missing` value,
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
index 050eb2e..77622d3 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/utils/RegExpMatcher.java
@@ -180,6 +180,9 @@
* @return a new string with contained regular expressions replaced.
*/
public String replace(UTF8StringPointable replaceStrPtr, int replaceLimit) {
+ if (replaceLimit < 0) {
+ replaceLimit = Integer.MAX_VALUE;
+ }
// Sets up a new replacement string if necessary.
final boolean newReplace =
replaceStrPtr != null && (replaceStr == null || lastReplaceStrPtr.compareTo(replaceStrPtr) != 0);
@@ -187,7 +190,6 @@
StringEvaluatorUtils.copyResetUTF8Pointable(replaceStrPtr, lastReplaceStorage, lastReplaceStrPtr);
replaceStr = replaceStrPtr.toString();
}
-
// Does the actual replacement.
resultBuf.setLength(0);
for (int i = 0; i < replaceLimit && matcher.find(); i++) {
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index f5ddfe7..c60e744 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -626,9 +626,12 @@
public static boolean findAndReplace(UTF8StringPointable srcPtr, UTF8StringPointable searchPtr,
UTF8StringPointable replacePtr, int replaceLimit, UTF8StringBuilder builder, GrowableArray out)
throws IOException {
- if (replaceLimit < 1) {
+ if (replaceLimit == 0) {
return false;
}
+ if (replaceLimit < 0) {
+ replaceLimit = Integer.MAX_VALUE;
+ }
int curIdx = find(srcPtr, searchPtr, false);
if (curIdx < 0) {
return false;