[ASTERIXDB-2449][FUN] Incorrect NULL/MISSING handling by concat functions
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
- Infer function return type as unknownable if
its input list item can be NULL/MISSING
- Always return MISSING if there is a MISSING argument
Change-Id: Idc364b061f3e74bdc9d7715bbadedc957e9e8223
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2946
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
new file mode 100644
index 0000000..782cce2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/binary/concat2/concat2.1.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+ 't1': binary_concat([null,hex('aa')]),
+ 't2': binary_concat([hex('aa'),null]),
+ 't3': binary_concat([null,missing,hex('aa')]),
+ 't4': binary_concat([hex('aa'),null,missing])
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
new file mode 100644
index 0000000..3e48631
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.1.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+ 't1': string_concat([null,'aa']),
+ 't2': string_concat(['aa',null]),
+ 't3': string_concat([null,missing,'aa']),
+ 't4': string_concat(['aa',null,missing])
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
new file mode 100644
index 0000000..12208a7
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/string-concat2/string-concat2.2.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+{
+ 't1': null || 'aa',
+ 't2': 'aa' || null,
+ 't3': null || missing || 'aa',
+ 't4': 'aa' || null || missing
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/binary/concat2/concat2.1.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.1.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
new file mode 100644
index 0000000..770f2cd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/string-concat2/string-concat2.2.adm
@@ -0,0 +1 @@
+{ "t1": null, "t2": null }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 904dc61..04b484e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6619,6 +6619,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="string">
+ <compilation-unit name="string-concat2">
+ <output-dir compare="Text">string-concat2</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="string">
<compilation-unit name="string-equal1">
<output-dir compare="Text">string-equal1</output-dir>
</compilation-unit>
@@ -9837,6 +9842,11 @@
</compilation-unit>
</test-case>
<test-case FilePath="binary">
+ <compilation-unit name="concat2">
+ <output-dir compare="Text">concat2</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="binary">
<compilation-unit name="subbinary">
<output-dir compare="Text">subbinary</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
index d8eeefc..49db062 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/BuiltinFunctions.java
@@ -68,6 +68,7 @@
import org.apache.asterix.om.typecomputer.impl.CollectionMemberResultType;
import org.apache.asterix.om.typecomputer.impl.CollectionToSequenceTypeComputer;
import org.apache.asterix.om.typecomputer.impl.ConcatNonNullTypeComputer;
+import org.apache.asterix.om.typecomputer.impl.ConcatTypeComputer;
import org.apache.asterix.om.typecomputer.impl.DoubleIfTypeComputer;
import org.apache.asterix.om.typecomputer.impl.FieldAccessByIndexResultType;
import org.apache.asterix.om.typecomputer.impl.FieldAccessByNameResultType;
@@ -1240,7 +1241,7 @@
addFunction(BINARY_LENGTH, UnaryBinaryInt64TypeComputer.INSTANCE, true);
addFunction(PARSE_BINARY, ABinaryTypeComputer.INSTANCE, true);
addFunction(PRINT_BINARY, AStringTypeComputer.INSTANCE, true);
- addFunction(BINARY_CONCAT, ABinaryTypeComputer.INSTANCE, true);
+ addFunction(BINARY_CONCAT, ConcatTypeComputer.INSTANCE_BINARY, true);
addFunction(SUBBINARY_FROM, ABinaryTypeComputer.INSTANCE, true);
addFunction(SUBBINARY_FROM_TO, ABinaryTypeComputer.INSTANCE, true);
addFunction(FIND_BINARY, AInt64TypeComputer.INSTANCE, true);
@@ -1251,7 +1252,7 @@
addFunction(STRING_CONTAINS, ABooleanTypeComputer.INSTANCE, true);
addFunction(STRING_TO_CODEPOINT, StringToInt64ListTypeComputer.INSTANCE, true);
addFunction(CODEPOINT_TO_STRING, AStringTypeComputer.INSTANCE, true);
- addFunction(STRING_CONCAT, AStringTypeComputer.INSTANCE, true);
+ addFunction(STRING_CONCAT, ConcatTypeComputer.INSTANCE_STRING, true);
addFunction(SUBSTRING2, StringIntToStringTypeComputer.INSTANCE_NULLABLE, true);
addFunction(STRING_LENGTH, UnaryStringInt64TypeComputer.INSTANCE, true);
addFunction(STRING_LOWERCASE, StringStringTypeComputer.INSTANCE, true);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
new file mode 100644
index 0000000..db59877
--- /dev/null
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/typecomputer/impl/ConcatTypeComputer.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.om.typecomputer.impl;
+
+import org.apache.asterix.om.typecomputer.base.AbstractResultTypeComputer;
+import org.apache.asterix.om.types.AUnionType;
+import org.apache.asterix.om.types.AbstractCollectionType;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.IAType;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
+
+public class ConcatTypeComputer extends AbstractResultTypeComputer {
+
+ public static final ConcatTypeComputer INSTANCE_STRING = new ConcatTypeComputer(BuiltinType.ASTRING);
+
+ public static final ConcatTypeComputer INSTANCE_BINARY = new ConcatTypeComputer(BuiltinType.ABINARY);
+
+ private final IAType resultType;
+
+ private ConcatTypeComputer(IAType resultType) {
+ this.resultType = resultType;
+ }
+
+ @Override
+ protected IAType getResultType(ILogicalExpression expr, IAType... strippedInputTypes) throws AlgebricksException {
+ IAType argType = strippedInputTypes[0];
+ IAType outputType = resultType;
+ if (!argType.getTypeTag().isListType() || isUnknownable(((AbstractCollectionType) argType).getItemType())) {
+ outputType = AUnionType.createUnknownableType(outputType);
+ }
+ return outputType;
+ }
+
+ private boolean isUnknownable(IAType type) {
+ switch (type.getTypeTag()) {
+ case ANY:
+ case MISSING:
+ case NULL:
+ return true;
+ case UNION:
+ return ((AUnionType) type).isUnknownableType();
+ default:
+ return false;
+ }
+ }
+}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
index cb213ae..a2872bf 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/StringConcatDescriptor.java
@@ -94,6 +94,7 @@
listAccessor.reset(listBytes, listOffset);
// calculate length first
int utf8Len = 0;
+ boolean itemIsNull = false;
for (int i = 0; i < listAccessor.size(); i++) {
int itemOffset = listAccessor.getItemOffset(i);
ATypeTag itemType = listAccessor.getItemType(itemOffset);
@@ -104,9 +105,8 @@
}
if (itemType != ATypeTag.STRING) {
if (itemType == ATypeTag.NULL) {
- nullSerde.serialize(ANull.NULL, out);
- result.set(resultStorage);
- return;
+ itemIsNull = true;
+ continue;
}
if (itemType == ATypeTag.MISSING) {
missingSerde.serialize(AMissing.MISSING, out);
@@ -118,6 +118,11 @@
}
utf8Len += UTF8StringUtil.getUTFLength(listBytes, itemOffset);
}
+ if (itemIsNull) {
+ nullSerde.serialize(ANull.NULL, out);
+ result.set(resultStorage);
+ return;
+ }
out.writeByte(ATypeTag.SERIALIZED_STRING_TYPE_TAG);
int cbytes = UTF8StringUtil.encodeUTF8Length(utf8Len, tempLengthArray, 0);
out.write(tempLengthArray, 0, cbytes);
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
index 59f3396..4eaa1d1 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/binary/BinaryConcatDescriptor.java
@@ -94,11 +94,17 @@
try {
listAccessor.reset(data, offset);
int concatLength = 0;
+ boolean itemIsNull = false;
for (int i = 0; i < listAccessor.size(); i++) {
int itemOffset = listAccessor.getItemOffset(i);
ATypeTag itemType = listAccessor.getItemType(itemOffset);
if (itemType != ATypeTag.BINARY) {
- if (serializeUnknownIfAnyUnknown(itemType)) {
+ if (itemType == ATypeTag.NULL) {
+ itemIsNull = true;
+ continue;
+ }
+ if (itemType == ATypeTag.MISSING) {
+ missingSerde.serialize(AMissing.MISSING, dataOutput);
result.set(resultStorage);
return;
}
@@ -107,6 +113,11 @@
}
concatLength += ByteArrayPointable.getContentLength(data, itemOffset);
}
+ if (itemIsNull) {
+ nullSerde.serialize(ANull.NULL, dataOutput);
+ result.set(resultStorage);
+ return;
+ }
dataOutput.writeByte(ATypeTag.SERIALIZED_BINARY_TYPE_TAG);
int metaLen = VarLenIntEncoderDecoder.encode(concatLength, metaBuffer, 0);
dataOutput.write(metaBuffer, 0, metaLen);
@@ -122,20 +133,6 @@
}
result.set(resultStorage);
}
-
- private boolean serializeUnknownIfAnyUnknown(ATypeTag... tags) throws HyracksDataException {
- for (ATypeTag typeTag : tags) {
- if (typeTag == ATypeTag.NULL) {
- nullSerde.serialize(ANull.NULL, dataOutput);
- return true;
- }
- if (typeTag == ATypeTag.MISSING) {
- missingSerde.serialize(AMissing.MISSING, dataOutput);
- return true;
- }
- }
- return false;
- }
};
}
};