[ASTERIXDB-3482][STO] Fix handling NULLs

- user model changes: no
- storage format changes: yes
- interface changes: no

Details:
- Ensure to write the nullbit (the level MSB) on merge
- Fix the ClassCastException in union node when the
  originalType is a nested node

Ext-ref: MB-63167
Change-Id: I19321e6e1cd2d569ba989afc45897da7054b86bd
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18658
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
Tested-by: Wail Alkowaileet <wael.y.k@gmail.com>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp
new file mode 100644
index 0000000..064d705
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (id: int) WITH {
+    "storage-format": {"format" : "column"}
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp
new file mode 100644
index 0000000..205b6a2
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.002.update.sqlpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset (
+    {"id":0, "a": [1]},
+    {"id":1, "a": [1]},
+    {"id":2, "a": [1]},
+    {"id":3, "a": [1]},
+    {"id":4, "a": [1]},
+    {"id":5, "a": [1]},
+    {"id":6, "a": [1]},
+    {"id":7, "a": [1]}
+);
+
+UPSERT INTO ColumnDataset (
+    {"id":8, "a": 5},
+    {"id":9, "a": 5},
+    {"id":10, "a": 5},
+    {"id":11, "a": 5},
+    {"id":12, "a": 5},
+    {"id":13, "a": 5},
+    {"id":14, "a": 5},
+    {"id":15, "a": 5}
+);
+
+UPSERT INTO ColumnDataset (
+    {"id":16, "a": null},
+    {"id":17, "a": null},
+    {"id":18, "a": null},
+    {"id":19, "a": null},
+    {"id":20, "a": null},
+    {"id":21, "a": null},
+    {"id":22, "a": null},
+    {"id":23, "a": null}
+);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp
new file mode 100644
index 0000000..05d4c05
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/007/006.003.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT p.a, p.a IS NULL AS null_check
+FROM ColumnDataset p
+ORDER BY p.id
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp
new file mode 100644
index 0000000..064d705
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.001.ddl.sqlpp
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+DROP DATAVERSE test IF EXISTS;
+CREATE DATAVERSE test;
+
+USE test;
+
+CREATE DATASET ColumnDataset
+PRIMARY KEY (id: int) WITH {
+    "storage-format": {"format" : "column"}
+};
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp
new file mode 100644
index 0000000..e1cbb81
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.002.update.sqlpp
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+UPSERT INTO ColumnDataset (
+    {"id":0, "a": [1]},
+    {"id":1, "a": [1]},
+    {"id":2, "a": [1]},
+    {"id":3, "a": [1]},
+    {"id":4, "a": [1]},
+    {"id":5, "a": [1]},
+    {"id":6, "a": [1]},
+    {"id":7, "a": [1]}
+);
+
+UPSERT INTO ColumnDataset (
+    {"id":8, "a": [5]},
+    {"id":9, "a": [5]},
+    {"id":10, "a": [5]},
+    {"id":11, "a": [5]},
+    {"id":12, "a": [5]},
+    {"id":13, "a": [5]},
+    {"id":14, "a": [5]},
+    {"id":15, "a": [5]}
+);
+
+UPSERT INTO ColumnDataset (
+    {"id":16, "a": null},
+    {"id":17, "a": null},
+    {"id":18, "a": null},
+    {"id":19, "a": null},
+    {"id":20, "a": null},
+    {"id":21, "a": null},
+    {"id":22, "a": null},
+    {"id":23, "a": null}
+);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp
new file mode 100644
index 0000000..05d4c05
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/column/missing-null-values/008/006.003.query.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+SELECT p.a, p.a IS NULL AS null_check
+FROM ColumnDataset p
+ORDER BY p.id
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm
new file mode 100644
index 0000000..4d2d643
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/007/007.003.adm
@@ -0,0 +1,24 @@
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": false, "a": 5 }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm
new file mode 100644
index 0000000..a1c54ff
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/column/missing-null-values/008/008.003.adm
@@ -0,0 +1,24 @@
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 1 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": false, "a": [ 5 ] }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
+{ "null_check": true, "a": null }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
index d139bd0..2aba758 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/sqlpp_queries.xml
@@ -16369,6 +16369,16 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="column">
+      <compilation-unit name="missing-null-values/007">
+        <output-dir compare="Text">missing-null-values/007</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
+      <compilation-unit name="missing-null-values/008">
+        <output-dir compare="Text">missing-null-values/008</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="column">
       <compilation-unit name="empty-array/001">
         <output-dir compare="Text">empty-array/001</output-dir>
       </compilation-unit>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
index cccac50..71b561a 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/ColumnTransformer.java
@@ -190,13 +190,18 @@
             currentParent = unionNode;
 
             ATypeTag childTypeTag = pointable.getTypeTag();
-            AbstractSchemaNode actualNode;
+
             if (childTypeTag == ATypeTag.NULL || childTypeTag == ATypeTag.MISSING) {
-                actualNode = unionNode.getOriginalType();
+                /*
+                 * NULL and MISSING are tracked since the start to be written in the originalType (i.e., the type
+                 * before injecting a union between the parent and the original node).
+                 */
+                AbstractSchemaNode actualNode = unionNode.getOriginalType();
+                acceptActualNode(pointable, actualNode);
             } else {
-                actualNode = unionNode.getOrCreateChild(pointable.getTypeTag(), columnMetadata);
+                AbstractSchemaNode actualNode = unionNode.getOrCreateChild(pointable.getTypeTag(), columnMetadata);
+                pointable.accept(this, actualNode);
             }
-            pointable.accept(this, actualNode);
 
             currentParent = previousParent;
             columnMetadata.exitNode(node);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
index b03da57..0942a23 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/AbstractColumnValuesReader.java
@@ -202,6 +202,15 @@
         }
     }
 
+    protected final void writeLevel(IColumnValuesWriter writer) throws HyracksDataException {
+        if (isNull()) {
+            // This will prepend the nullBitMask
+            writer.writeNull(level);
+        } else {
+            writer.writeLevel(level);
+        }
+    }
+
     protected void appendCommon(ObjectNode node) {
         node.put("typeTag", getTypeTag().toString());
         node.put("columnIndex", columnIndex);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
index f1c2929..7b02c70 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/PrimitiveColumnValuesReader.java
@@ -100,7 +100,7 @@
             throw e;
         }
 
-        writer.writeLevel(level);
+        writeLevel(writer);
         if (primaryKey || isValue()) {
             try {
                 writer.writeValue(this);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
index 3f90a4b..0f3b817 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/values/reader/RepeatedPrimitiveColumnValuesReader.java
@@ -99,15 +99,16 @@
 
         if (isRepeatedValue()) {
             while (!isLastDelimiter()) {
-                writer.writeLevel(level);
+                writeLevel(writer);
                 if (isValue()) {
                     writer.writeValue(this);
                 }
                 doNextAndCheck();
             }
         }
+
         //Add last delimiter, or NULL/MISSING
-        writer.writeLevel(level);
+        writeLevel(writer);
     }
 
     @Override
diff --git a/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..041afdc
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/data/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a": [0]}
+{"a": 1}
+{"a": null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..1b928a2
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/assembler/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a":[0]}
+{"a":1}
+{"a":null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json b/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json
new file mode 100644
index 0000000..1b928a2
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/small/211-unionArrayPrimitiveNull.json
@@ -0,0 +1,3 @@
+{"a":[0]}
+{"a":1}
+{"a":null}
diff --git a/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema b/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema
new file mode 100644
index 0000000..8fc4a85
--- /dev/null
+++ b/asterixdb/asterix-column/src/test/resources/result/transformer/211-unionArrayPrimitiveNull.schema
@@ -0,0 +1,7 @@
+root
+|-- a: union <level: 1>
+|    |-- bigint: bigint <level: 1, index: 1>
+|    |    |-- Def size: 3 [(0,1),(1,1),(0,1)]
+|    |-- array: array <level: 1>
+|    |    |-- item: bigint <level: 2, index: 0>
+|    |    |    |-- Def size: 5 [(2,1),(1,1),(0,2),(4,1)]