[ASTERIXDB-3482][STO] Fix handling NULLs
- user model changes: no
- storage format changes: yes
- interface changes: no
Details:
- Ensure to write the nullbit (the level MSB) on merge
- Fix the ClassCastException in union node when the
originalType is a nested node
Ext-ref: MB-63167
Change-Id: I19321e6e1cd2d569ba989afc45897da7054b86bd
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18658
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp
new file mode 100644
index 0000000..064d705
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (id: int) WITH {
+ "storage-format": {"format" : "column"}
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp
new file mode 100644
index 0000000..205b6a2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset (
+ {"id":0, "a": [1]},
+ {"id":1, "a": [1]},
+ {"id":2, "a": [1]},
+ {"id":3, "a": [1]},
+ {"id":4, "a": [1]},
+ {"id":5, "a": [1]},
+ {"id":6, "a": [1]},
+ {"id":7, "a": [1]}
+);
+
+UPSERT INTO ColumnDataset (
+ {"id":8, "a": 5},
+ {"id":9, "a": 5},
+ {"id":10, "a": 5},
+ {"id":11, "a": 5},
+ {"id":12, "a": 5},
+ {"id":13, "a": 5},
+ {"id":14, "a": 5},
+ {"id":15, "a": 5}
+);
+
+UPSERT INTO ColumnDataset (
+ {"id":16, "a": null},
+ {"id":17, "a": null},
+ {"id":18, "a": null},
+ {"id":19, "a": null},
+ {"id":20, "a": null},
+ {"id":21, "a": null},
+ {"id":22, "a": null},
+ {"id":23, "a": null}
+);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp
new file mode 100644
index 0000000..05d4c05
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT p.a, p.a IS NULL AS null_check
+FROM ColumnDataset p
+ORDER BY p.id
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp
new file mode 100644
index 0000000..064d705
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (id: int) WITH {
+ "storage-format": {"format" : "column"}
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp
new file mode 100644
index 0000000..e1cbb81
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset (
+ {"id":0, "a": [1]},
+ {"id":1, "a": [1]},
+ {"id":2, "a": [1]},
+ {"id":3, "a": [1]},
+ {"id":4, "a": [1]},
+ {"id":5, "a": [1]},
+ {"id":6, "a": [1]},
+ {"id":7, "a": [1]}
+);
+
+UPSERT INTO ColumnDataset (
+ {"id":8, "a": [5]},
+ {"id":9, "a": [5]},
+ {"id":10, "a": [5]},
+ {"id":11, "a": [5]},
+ {"id":12, "a": [5]},
+ {"id":13, "a": [5]},
+ {"id":14, "a": [5]},
+ {"id":15, "a": [5]}
+);
+
+UPSERT INTO ColumnDataset (
+ {"id":16, "a": null},
+ {"id":17, "a": null},
+ {"id":18, "a": null},
+ {"id":19, "a": null},
+ {"id":20, "a": null},
+ {"id":21, "a": null},
+ {"id":22, "a": null},
+ {"id":23, "a": null}
+);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp
new file mode 100644
index 0000000..05d4c05
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT p.a, p.a IS NULL AS null_check
+FROM ColumnDataset p
+ORDER BY p.id
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm
new file mode 100644
index 0000000..4d2d643
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm
@@ -0,0 +1,24 @@
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm
new file mode 100644
index 0000000..a1c54ff
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm
@@ -0,0 +1,24 @@
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index d139bd0..2aba758 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -16369,6 +16369,16 @@
</compilation-unit>
</test-case>
<test-case FilePath="column">
+ <compilation-unit name="missing-null-values/007">
+ <output-dir compare="Text">missing-null-values/007</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="column">
+ <compilation-unit name="missing-null-values/008">
+ <output-dir compare="Text">missing-null-values/008</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="column">
<compilation-unit name="empty-array/001">
<output-dir compare="Text">empty-array/001</output-dir>
</compilation-unit>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
index cccac50..71b561a 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
@@ -190,13 +190,18 @@
currentParent = unionNode;
ATypeTag childTypeTag = pointable.getTypeTag();
- AbstractSchemaNode actualNode;
+
if (childTypeTag == ATypeTag.NULL || childTypeTag == ATypeTag.MISSING) {
- actualNode = unionNode.getOriginalType();
+ /*
+ * NULL and MISSING are tracked since the start to be written in the originalType (i.e., the type
+ * before injecting a union between the parent and the original node).
+ */
+ AbstractSchemaNode actualNode = unionNode.getOriginalType();
+ acceptActualNode(pointable, actualNode);
} else {
- actualNode = unionNode.getOrCreateChild(pointable.getTypeTag(), columnMetadata);
+ AbstractSchemaNode actualNode = unionNode.getOrCreateChild(pointable.getTypeTag(), columnMetadata);
+ pointable.accept(this, actualNode);
}
- pointable.accept(this, actualNode);
currentParent = previousParent;
columnMetadata.exitNode(node);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
index b03da57..0942a23 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
@@ -202,6 +202,15 @@
}
}
+ protected final void writeLevel(IColumnValuesWriter writer) throws HyracksDataException {
+ if (isNull()) {
+ // This will prepend the nullBitMask
+ writer.writeNull(level);
+ } else {
+ writer.writeLevel(level);
+ }
+ }
+
protected void appendCommon(ObjectNode node) {
node.put("typeTag", getTypeTag().toString());
node.put("columnIndex", columnIndex);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
index f1c2929..7b02c70 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
@@ -100,7 +100,7 @@
throw e;
}
- writer.writeLevel(level);
+ writeLevel(writer);
if (primaryKey || isValue()) {
try {
writer.writeValue(this);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
index 3f90a4b..0f3b817 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
@@ -99,15 +99,16 @@
if (isRepeatedValue()) {
while (!isLastDelimiter()) {
- writer.writeLevel(level);
+ writeLevel(writer);
if (isValue()) {
writer.writeValue(this);
}
doNextAndCheck();
}
}
+
//Add last delimiter, or NULL/MISSING
- writer.writeLevel(level);
+ writeLevel(writer);
}
@Override
diff --git a/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..041afdc
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a": [0]}
+{"a": 1}
+{"a": null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..1b928a2
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a":[0]}
+{"a":1}
+{"a":null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..1b928a2
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a":[0]}
+{"a":1}
+{"a":null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema b/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema
new file mode 100644
index 0000000..8fc4a85
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema
@@ -0,0 +1,7 @@
+root
+|-- a: union <level: 1>
+| |-- bigint: bigint <level: 1, index: 1>
+| | |-- Def size: 3 [(0,1),(1,1),(0,1)]
+| |-- array: array <level: 1>
+| | |-- item: bigint <level: 2, index: 0>
+| | | |-- Def size: 5 [(2,1),(1,1),(0,2),(4,1)]