[ASTERIXDB-2442][FUN] substring() should return NULL if the operation cannot be performed
- user model changes: yes
- storage format changes: no
- interface changes: no
Details:
- substring() should return NULL if starting offset is out of bounds
for given string or length is negative
(merge commit '42a739aba768357eb9f80f917f8967879bf44768' from stabilization-f69489)
Change-Id: Iedc34869feb91cd8015db36b61fee7e803e9ec1f
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
index 1c31ea0..f792e7f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substr04/substr04.3.query.aql
@@ -30,6 +30,9 @@
substring("ABCD", 0, 4),
substring("UC Irvine", 3, string-length("UC Irvine") - 3),
substring("UC Irvine", 0, string-length("UC Irvine")),
-substring(substring("UC Irvine", 3), 0, string-length("Irvine"))
+substring(substring("UC Irvine", 3), 0, string-length("Irvine")),
+substring('ABCD',-3,2),
+substring('ABCD',-10,1),
+substring('ABCD',1,-1)
]
return $a
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
index ecfcd94..b0e8697 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/string/substring2-4/substring2-4.3.query.aql
@@ -19,4 +19,5 @@
use dataverse test;
let $c1 := substring("HEllow",-3)
-return {"result1": $c1}
+let $c2 := substring("HEllow",-7)
+return {"result1": $c1, "result2": $c2}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
index e1ef68b..d9f936f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr04/substr04.3.query.sqlpp
@@ -32,7 +32,10 @@
substring('ABCD',0,4),
substring('UC Irvine',3,(`string-length`('UC Irvine') - 3)),
substring('UC Irvine',0,`string-length`('UC Irvine')),
- substring(substring('UC Irvine',3),0,`string-length`('Irvine'))
+ substring(substring('UC Irvine',3),0,`string-length`('Irvine')),
+ substring('ABCD',-3,2),
+ substring('ABCD',-10,1),
+ substring('ABCD',1,-1)
] as a
;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
index ae943d4..6b63289 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substring2-4/substring2-4.3.query.sqlpp
@@ -19,5 +19,7 @@
use test;
-
-{'result1':test.substring('HEllow',-3)};
+{
+ 'result1':substring('HEllow',-3),
+ 'result2':substring('HEllow',-7)
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
index 326e22f..c8cbcf0 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr01/substr01.1.adm
@@ -1 +1 @@
-{ "str2": "ld", "str4": "g", "str6": "", "str8": "This is a test string", "str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" }
+{ "str2": "ld", "str4": "g", "str6": null, "str8": "This is a test string", "str10": "string", "str13": "gThis is a another test string", "str14": "Irvine" }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
index 74bd0d6..4661f40 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr04/substr04.1.adm
@@ -6,3 +6,6 @@
"Irvine"
"UC Irvine"
"Irvine"
+"BC"
+null
+null
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
index 04393a4..f60907d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-3/substring2-3.1.adm
@@ -1 +1 @@
-{ "result1": "" }
+{ "result1": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
index 197a7af..8355b34 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substring2-4/substring2-4.1.adm
@@ -1 +1 @@
-{ "result1": "low" }
+{ "result1": "low", "result2": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring/substring.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring/substring.1.adm
index a8e64f4..a94cd89 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring/substring.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring/substring.1.adm
@@ -1 +1 @@
-[ "g", "ab", "ab", "bc", "cd" ]
+[ "g", null, "ab", "bc", "cd" ]
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring2/substring2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring2/substring2.1.adm
index a11b25b..9b005ca 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring2/substring2.1.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/stringoffset/substring2/substring2.1.adm
@@ -1 +1 @@
-[ "g", "abcdefg", "abcdefg", "bcdefg", "cdefg" ]
+[ "g", null, "abcdefg", "bcdefg", "cdefg" ]
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substr04/substr04.3.ast b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substr04/substr04.3.ast
index f85b949..1727ec1 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substr04/substr04.3.ast
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substr04/substr04.3.ast
@@ -56,6 +56,21 @@
LiteralExpr [STRING] [Irvine]
]
]
+ FunctionCall null.substring@3[
+ LiteralExpr [STRING] [ABCD]
+ - LiteralExpr [LONG] [3]
+ LiteralExpr [LONG] [2]
+ ]
+ FunctionCall null.substring@3[
+ LiteralExpr [STRING] [ABCD]
+ - LiteralExpr [LONG] [10]
+ LiteralExpr [LONG] [1]
+ ]
+ FunctionCall null.substring@3[
+ LiteralExpr [STRING] [ABCD]
+ LiteralExpr [LONG] [1]
+ - LiteralExpr [LONG] [1]
+ ]
]
AS Variable [ Name=$a ]
]
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substring2-4/substring2-4.3.ast b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substring2-4/substring2-4.3.ast
index 8ff93d6..eb5857e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substring2-4/substring2-4.3.ast
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results_parser_sqlpp/string/substring2-4/substring2-4.3.ast
@@ -9,4 +9,12 @@
- LiteralExpr [LONG] [3]
]
)
+ (
+ LiteralExpr [STRING] [result2]
+ :
+ FunctionCall test.substring@2[
+ LiteralExpr [STRING] [HEllow]
+ - LiteralExpr [LONG] [7]
+ ]
+ )
]
diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
index 1c2fd7e..a7b9bbd 100644
--- a/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
+++ b/asterixdb/asterix-doc/src/main/markdown/builtins/2_string_common.md
@@ -536,7 +536,8 @@
* Return Value:
* a `string` that represents the substring,
* `missing` if any argument is a `missing` value,
- * `null` if any argument is a `null` value but no argument is a `missing` value,
+ * `null` if any argument is a `null` value but no argument is a `missing` value, or if the substring could not
+ be obtained because the starting offset is not within string bounds or `length` is negative.
* a type error will be raised if:
* the first argument is any other non-string value,
* or, the second argument is not a `tinyint`, `smallint`, `integer`, or `bigint`,
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index 1df617e..d8eeefc 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -1252,7 +1252,7 @@
addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true);
addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
- addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE, true);
+ addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE_NULLABLE, true);
addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true);
addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);
addFunction(STRING_UPPERCASE, StringStringTypeComputer.INSTANCE, true);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
index 0f376ba..b2c95c5 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/StringIntToStringTypeComputer.java
@@ -24,6 +24,7 @@
import org.apache.asterix.om.exceptions.TypeMismatchException;
import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.IAType;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -31,13 +32,16 @@
import org.apache.hyracks.api.exceptions.SourceLocation;
public class StringIntToStringTypeComputer extends AbstractResultTypeComputer {
- public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(0, 0, 1, 1);
+ public static final StringIntToStringTypeComputer INSTANCE = new StringIntToStringTypeComputer(0, 0, 1, 1, false);
+
+ public static final StringIntToStringTypeComputer INSTANCE_NULLABLE =
+ new StringIntToStringTypeComputer(0, 0, 1, 1, true);
public static final StringIntToStringTypeComputer INSTANCE_TRIPLE_STRING =
- new StringIntToStringTypeComputer(0, 2, 3, 3);
+ new StringIntToStringTypeComputer(0, 2, 3, 3, false);
public static final StringIntToStringTypeComputer INSTANCE_STRING_REGEXP_REPLACE_WITH_FLAG =
- new StringIntToStringTypeComputer(0, 3, 3, 3);
+ new StringIntToStringTypeComputer(0, 3, 3, 3, false);
private final int stringArgIdxMin;
@@ -47,11 +51,15 @@
private final int intArgIdxMax;
- public StringIntToStringTypeComputer(int stringArgIdxMin, int stringArgIdxMax, int intArgIdxMin, int intArgIdxMax) {
+ private final boolean nullable;
+
+ public StringIntToStringTypeComputer(int stringArgIdxMin, int stringArgIdxMax, int intArgIdxMin, int intArgIdxMax,
+ boolean nullable) {
this.stringArgIdxMin = stringArgIdxMin;
this.stringArgIdxMax = stringArgIdxMax;
this.intArgIdxMin = intArgIdxMin;
this.intArgIdxMax = intArgIdxMax;
+ this.nullable = nullable;
}
@Override
@@ -84,7 +92,11 @@
@Override
public IAType getResultType(ILogicalExpression expr, IAType... types) throws AlgebricksException {
- return BuiltinType.ASTRING;
+ IAType resultType = BuiltinType.ASTRING;
+ if (nullable) {
+ resultType = AUnionType.createNullableType(resultType);
+ }
+ return resultType;
}
private ATypeTag[] getExpectedTypes(boolean expectedStringType, boolean expectedIntType) {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
index a1d46f0..aa31942 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/SubstringTypeComputer.java
@@ -21,6 +21,7 @@
import org.apache.asterix.om.exceptions.TypeMismatchException;
import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.IAType;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
@@ -53,6 +54,6 @@
@Override
public IAType getResultType(ILogicalExpression expr, IAType... types) throws AlgebricksException {
- return BuiltinType.ASTRING;
+ return AUnionType.createNullableType(BuiltinType.ASTRING);
}
}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
index bda5b2c..52ca6cd 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/Substring2Descriptor.java
@@ -98,19 +98,18 @@
array.reset();
try {
int actualStart = start >= 0 ? start - baseOffset : string.getStringLength() + start;
- UTF8StringPointable.substr(string, actualStart, Integer.MAX_VALUE, builder, array);
- } catch (StringIndexOutOfBoundsException e) {
- throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start);
+ boolean success =
+ UTF8StringPointable.substr(string, actualStart, Integer.MAX_VALUE, builder, array);
+ if (success) {
+ out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+ out.write(array.getByteArray(), 0, array.getLength());
+ result.set(resultStorage);
+ } else {
+ PointableHelper.setNull(result);
+ }
} catch (IOException e) {
throw HyracksDataException.create(e);
}
- try {
- out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
- out.write(array.getByteArray(), 0, array.getLength());
- } catch (IOException e) {
- throw HyracksDataException.create(e);
- }
- result.set(resultStorage);
}
};
}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
index dd1bd3d..ab0f520 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/SubstringDescriptor.java
@@ -110,20 +110,17 @@
array.reset();
try {
int actualStart = start >= 0 ? start - baseOffset : string.getStringLength() + start;
- UTF8StringPointable.substr(string, actualStart, len, builder, array);
- } catch (StringIndexOutOfBoundsException e) {
- throw new RuntimeDataException(ErrorCode.OUT_OF_BOUND, getIdentifier(), 1, start + len - 1);
+ boolean success = UTF8StringPointable.substr(string, actualStart, len, builder, array);
+ if (success) {
+ out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
+ out.write(array.getByteArray(), 0, array.getLength());
+ result.set(resultStorage);
+ } else {
+ PointableHelper.setNull(result);
+ }
} catch (IOException e) {
throw HyracksDataException.create(e);
}
-
- try {
- out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
- out.write(array.getByteArray(), 0, array.getLength());
- } catch (IOException e) {
- throw HyracksDataException.create(e);
- }
- result.set(resultStorage);
}
};
}
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index fb0c814..f683615 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -328,21 +328,25 @@
builder.finish();
}
- public void substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out)
+ /**
+ * @return {@code true} if substring was successfully written into given {@code out}, or
+ * {@code false} if substring could not be obtained ({@code charOffset} or {@code charLength}
+ * are less than 0 or starting position is greater than the input length)
+ */
+ public boolean substr(int charOffset, int charLength, UTF8StringBuilder builder, GrowableArray out)
throws IOException {
- substr(this, charOffset, charLength, builder, out);
+ return substr(this, charOffset, charLength, builder, out);
}
- public static void substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder,
+ /**
+ * @return {@code true} if substring was successfully written into given {@code out}, or
+ * {@code false} if substring could not be obtained ({@code charOffset} or {@code charLength}
+ * are less than 0 or starting position is greater than the input length)
+ */
+ public static boolean substr(UTF8StringPointable src, int charOffset, int charLength, UTF8StringBuilder builder,
GrowableArray out) throws IOException {
- // Really don't understand why we need to support the charOffset < 0 case.
- // At this time, usually there is mistake on user side, we'd better give him a warning.
- // assert charOffset >= 0;
- if (charOffset < 0) {
- charOffset = 0;
- }
- if (charLength < 0) {
- charLength = 0;
+ if (charOffset < 0 || charLength < 0) {
+ return false;
}
int utfLen = src.getUTF8Length();
@@ -353,11 +357,7 @@
chIdx++;
}
if (byteIdx >= utfLen) {
- // Again, why do we tolerant this kind of mistakes?
- // throw new StringIndexOutOfBoundsException(charOffset);
- builder.reset(out, 0);
- builder.finish();
- return;
+ return false;
}
builder.reset(out, Math.min(utfLen - byteIdx, (int) (charLength * 1.0 * byteIdx / chIdx)));
@@ -368,6 +368,7 @@
byteIdx += src.charSize(src.getMetaDataLength() + byteIdx);
}
builder.finish();
+ return true;
}
public void substrBefore(UTF8StringPointable match, UTF8StringBuilder builder, GrowableArray out)