[ASTERIXDB-3171][OTH] Log schema
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
In order to debug future issues in columnar datasets,
we need to know the schema information.
Change-Id: If5d1b9af8f0579df5cb27a90d3456407263ed53f
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17500
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Wail Alkowaileet <wael.y.k@gmail.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
index 2917074..2bd2954 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/PathExtractorVisitor.java
@@ -27,10 +27,13 @@
import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import it.unimi.dsi.fastutil.ints.IntList;
+
public class PathExtractorVisitor implements ISchemaNodeVisitor<AbstractSchemaNode, Void> {
@Override
public AbstractSchemaNode visit(ObjectSchemaNode objectNode, Void arg) throws HyracksDataException {
- int fieldNameIndex = objectNode.getChildrenFieldNameIndexes().getInt(0);
+ IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes();
+ int fieldNameIndex = fieldNameIndexes.isEmpty() ? -1 : objectNode.getChildrenFieldNameIndexes().getInt(0);
if (fieldNameIndex < 0) {
return MissingFieldSchemaNode.INSTANCE;
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
index 0771ccb..351dcdf 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/metadata/schema/visitor/SchemaClipperVisitor.java
@@ -65,6 +65,10 @@
try {
for (int i = 0; i < fieldNames.length; i++) {
int fieldNameIndex = fieldNamesDictionary.getFieldNameIndex(fieldNames[i]);
+ if (fieldNameIndex == -1) {
+ // Missing child
+ continue;
+ }
AbstractSchemaNode child = objectNode.getChild(fieldNameIndex);
clippedObjectNode.addChild(fieldNameIndex, fieldTypes[i].accept(this, child));
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
index 8cd1e98..0d7404d 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/lsm/flush/FlushColumnMetadata.java
@@ -42,6 +42,7 @@
import org.apache.asterix.column.metadata.schema.visitor.SchemaBuilderFromIATypeVisitor;
import org.apache.asterix.column.util.ColumnValuesUtil;
import org.apache.asterix.column.util.RunLengthIntArray;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
import org.apache.asterix.column.values.IColumnValuesWriter;
import org.apache.asterix.column.values.IColumnValuesWriterFactory;
import org.apache.asterix.column.values.writer.AbstractColumnValuesWriter;
@@ -54,6 +55,9 @@
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
import org.apache.hyracks.storage.am.lsm.btree.column.api.IColumnWriteMultiPageOp;
import org.apache.hyracks.storage.am.lsm.common.api.ILSMMemoryComponent;
+import org.apache.hyracks.util.LogRedactionUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
import it.unimi.dsi.fastutil.ints.IntArrayList;
@@ -62,6 +66,7 @@
* The schema here is mutable and can change according to the flushed records
*/
public final class FlushColumnMetadata extends AbstractColumnMetadata {
+ private static final Logger LOGGER = LogManager.getLogger();
private final Map<AbstractSchemaNestedNode, RunLengthIntArray> definitionLevels;
private final Mutable<IColumnWriteMultiPageOp> multiPageOpRef;
private final FieldNamesDictionary fieldNamesDictionary;
@@ -160,6 +165,7 @@
if (changed) {
try {
serializeChanges();
+ logSchema(root, metaRoot, fieldNamesDictionary);
changed = false;
} catch (IOException e) {
throw HyracksDataException.create(e);
@@ -253,6 +259,7 @@
ArrayBackedValueStorage schemaStorage = new ArrayBackedValueStorage(serializedMetadata.getLength());
schemaStorage.append(serializedMetadata);
+ logSchema(root, metaRoot, fieldNamesDictionary);
return new FlushColumnMetadata(datasetType, metaType, primaryKeys, metaContainsKeys, columnWriterFactory,
multiPageOpRef, writers, fieldNamesDictionary, root, metaRoot, definitionLevels, schemaStorage);
}
@@ -540,6 +547,22 @@
return nestedNode;
}
+ private static void logSchema(ObjectSchemaNode root, ObjectSchemaNode metaRoot,
+ FieldNamesDictionary fieldNamesDictionary) throws HyracksDataException {
+ if (!LOGGER.isDebugEnabled()) {
+ return;
+ }
+ // This should be a low frequency object creation
+ SchemaStringBuilderVisitor schemaBuilder = new SchemaStringBuilderVisitor(fieldNamesDictionary);
+ String recordSchema = LogRedactionUtil.userData(schemaBuilder.build(root));
+ LOGGER.debug("Schema for {} has changed: \n {}", SchemaStringBuilderVisitor.RECORD_SCHEMA, recordSchema);
+ if (metaRoot != null) {
+ String metaRecordSchema = LogRedactionUtil.userData(schemaBuilder.build(metaRoot));
+ LOGGER.debug("Schema for {} has changed: \n {}", SchemaStringBuilderVisitor.META_RECORD_SCHEMA,
+ metaRecordSchema);
+ }
+ }
+
public static ATypeTag getNormalizedTypeTag(ATypeTag typeTag) {
switch (typeTag) {
case TINYINT:
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
index b9babf1..54ac60c 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnMetadata.java
@@ -31,6 +31,7 @@
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider;
@@ -178,6 +179,9 @@
LOGGER.info("Filter: {}", filterString);
}
+ // log requested schema
+ logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, fieldNamesDictionary);
+
return new QueryColumnMetadata(datasetType, null, primaryKeyReaders, serializedMetadata, fieldNamesDictionary,
clippedRoot, readerFactory, valueGetterFactory, filterEvaluator, filterValueAccessors);
}
@@ -204,4 +208,13 @@
}
return primaryKeyReaders;
}
+
+ protected static void logSchema(ObjectSchemaNode root, String schemaSource,
+ FieldNamesDictionary fieldNamesDictionary) throws HyracksDataException {
+ if (LOGGER.isDebugEnabled()) {
+ SchemaStringBuilderVisitor schemaBuilder = new SchemaStringBuilderVisitor(fieldNamesDictionary);
+ String schema = LogRedactionUtil.userData(schemaBuilder.build(root));
+ LOGGER.debug("Queried {} schema: \n {}", schemaSource, schema);
+ }
+ }
}
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
index 2de1948..2896231 100644
--- a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/operation/query/QueryColumnWithMetaMetadata.java
@@ -30,6 +30,7 @@
import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
import org.apache.asterix.column.metadata.schema.visitor.SchemaClipperVisitor;
+import org.apache.asterix.column.util.SchemaStringBuilderVisitor;
import org.apache.asterix.column.values.IColumnValuesReaderFactory;
import org.apache.asterix.column.values.reader.PrimitiveColumnValuesReader;
import org.apache.asterix.column.values.reader.filter.FilterAccessorProvider;
@@ -144,6 +145,11 @@
PrimitiveColumnValuesReader[] primaryKeyReaders =
createPrimaryKeyReaders(input, readerFactory, numberOfPrimaryKeys);
+ // log requested schema for record
+ logSchema(clippedRoot, SchemaStringBuilderVisitor.RECORD_SCHEMA, fieldNamesDictionary);
+ // log requested schema for meta-record
+ logSchema(metaClippedRoot, SchemaStringBuilderVisitor.META_RECORD_SCHEMA, fieldNamesDictionary);
+
return new QueryColumnWithMetaMetadata(datasetType, metaType, primaryKeyReaders, serializedMetadata,
fieldNamesDictionary, clippedRoot, metaClippedRoot, readerFactory, valueGetterFactory, filterEvaluator,
filterValueAccessors);
diff --git a/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
new file mode 100644
index 0000000..85aca8f
--- /dev/null
+++ b/asterixdb/asterix-column/src/main/java/org/apache/asterix/column/util/SchemaStringBuilderVisitor.java
@@ -0,0 +1,152 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.column.util;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.asterix.column.metadata.FieldNamesDictionary;
+import org.apache.asterix.column.metadata.schema.AbstractSchemaNode;
+import org.apache.asterix.column.metadata.schema.ISchemaNodeVisitor;
+import org.apache.asterix.column.metadata.schema.ObjectSchemaNode;
+import org.apache.asterix.column.metadata.schema.UnionSchemaNode;
+import org.apache.asterix.column.metadata.schema.collection.AbstractCollectionSchemaNode;
+import org.apache.asterix.column.metadata.schema.primitive.PrimitiveSchemaNode;
+import org.apache.asterix.dataflow.data.nontagged.serde.AStringSerializerDeserializer;
+import org.apache.asterix.om.base.AString;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.api.IValueReference;
+import org.apache.hyracks.data.std.util.ByteArrayAccessibleDataInputStream;
+import org.apache.hyracks.data.std.util.ByteArrayAccessibleInputStream;
+import org.apache.hyracks.util.string.UTF8StringReader;
+import org.apache.hyracks.util.string.UTF8StringWriter;
+
+import it.unimi.dsi.fastutil.ints.IntList;
+
+public class SchemaStringBuilderVisitor implements ISchemaNodeVisitor<Void, Void> {
+ public static String RECORD_SCHEMA = "record";
+ public static String META_RECORD_SCHEMA = "meta-record";
+ private final StringBuilder builder;
+ private final List<String> fieldNames;
+
+ private int level;
+ private int indent;
+
+ public SchemaStringBuilderVisitor(FieldNamesDictionary dictionary) throws HyracksDataException {
+ builder = new StringBuilder();
+ this.fieldNames = new ArrayList<>();
+ AStringSerializerDeserializer stringSerDer =
+ new AStringSerializerDeserializer(new UTF8StringWriter(), new UTF8StringReader());
+ List<IValueReference> extractedFieldNames = dictionary.getFieldNames();
+
+ //Deserialize field names
+ ByteArrayAccessibleInputStream in = new ByteArrayAccessibleInputStream(new byte[0], 0, 0);
+ ByteArrayAccessibleDataInputStream dataIn = new ByteArrayAccessibleDataInputStream(in);
+ for (IValueReference serFieldName : extractedFieldNames) {
+ in.setContent(serFieldName.getByteArray(), 0, serFieldName.getLength());
+ AString fieldName = stringSerDer.deserialize(dataIn);
+ this.fieldNames.add(fieldName.getStringValue());
+ }
+ level = 0;
+ indent = 0;
+ }
+
+ public String build(ObjectSchemaNode root) throws HyracksDataException {
+ builder.append("root\n");
+ visit(root, null);
+ return builder.toString();
+ }
+
+ @Override
+ public Void visit(ObjectSchemaNode objectNode, Void arg) throws HyracksDataException {
+ List<AbstractSchemaNode> children = objectNode.getChildren();
+ IntList fieldNameIndexes = objectNode.getChildrenFieldNameIndexes();
+ level++;
+ indent++;
+
+ for (int i = 0; i < children.size(); i++) {
+ int index = fieldNameIndexes.getInt(i);
+ String fieldName = fieldNames.get(index);
+ AbstractSchemaNode child = children.get(i);
+ append(fieldName, index, child);
+ child.accept(this, null);
+ }
+
+ level--;
+ indent--;
+ return null;
+ }
+
+ @Override
+ public Void visit(AbstractCollectionSchemaNode collectionNode, Void arg) throws HyracksDataException {
+ level++;
+ indent++;
+ AbstractSchemaNode itemNode = collectionNode.getItemNode();
+ append("item", itemNode);
+ itemNode.accept(this, null);
+ level--;
+ indent--;
+ return null;
+ }
+
+ @Override
+ public Void visit(UnionSchemaNode unionNode, Void arg) throws HyracksDataException {
+ indent++;
+ for (AbstractSchemaNode child : unionNode.getChildren().values()) {
+ append(child.getTypeTag().toString(), child);
+ child.accept(this, null);
+ }
+ indent--;
+ return null;
+ }
+
+ @Override
+ public Void visit(PrimitiveSchemaNode primitiveNode, Void arg) throws HyracksDataException {
+ return null;
+ }
+
+ private void appendDecor() {
+ builder.append("| ".repeat(Math.max(0, indent - 1)));
+ builder.append("|-- ");
+ }
+
+ private void append(String key, AbstractSchemaNode node) {
+ append(key, -1, node);
+ }
+
+ private void append(String key, int index, AbstractSchemaNode node) {
+ appendDecor();
+ builder.append(key);
+ if (index >= 0) {
+ builder.append(" (");
+ builder.append(index);
+ builder.append(')');
+ }
+ builder.append(": ");
+ builder.append(node.getTypeTag().toString());
+ builder.append(" <level: ");
+ builder.append(level);
+ if (!node.isNested()) {
+ final PrimitiveSchemaNode primitiveNode = (PrimitiveSchemaNode) node;
+ builder.append(", index: ");
+ builder.append(primitiveNode.getColumnIndex());
+ }
+ builder.append(">\n");
+ }
+}