[ASTERIXDB-3120][ASTERIXDB-3121] CBO Analyze: improve error message.
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Improve error message if seed value is invalid.
Improve error message if sample value is invalid.
Change-Id: I599005f4f4814f719e422da4ff7d58c2253e5ed4
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17404
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Vijay Sarathy <vijay.sarathy@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
index 3190b2d..3dfad77 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
@@ -18,9 +18,10 @@
*/
/*
- * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement
+ * Description: analyze dataset with sample=high
+ * Note, there are more tuples in the dataset that the target sample size
*/
use test;
-analyze dataset ds1 drop statistics;
+analyze dataset ds1 with { "sample": "high", "sample-seed": -10 };
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
index 759fc3f..9e7988c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
@@ -18,10 +18,13 @@
*/
/*
- * Description: check that the sample was dropped
+ * Description: check that the sample was re-created
*/
+set `import-private-functions` `true`;
+
use test;
-select count(*) cnt
-from listMetadata(true, false) v;
+select * from
+ listMetadata(false, false) metadata,
+ showSampleStats("ds1", "sample_idx_2_ds1", false) stats;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
index 7d6bf92..3190b2d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
@@ -17,6 +17,10 @@
* under the License.
*/
+/*
+ * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement
+ */
+
use test;
-analyze dataset ds1;
+analyze dataset ds1 drop statistics;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
index a593df3..759fc3f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
@@ -18,7 +18,7 @@
*/
/*
- * Description: check that the sample was re-created again
+ * Description: check that the sample was dropped
*/
use test;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
index 151309a..7d6bf92 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
@@ -17,10 +17,6 @@
* under the License.
*/
-/*
- * Description: test that the sample index is dropped when its source dataset is dropped
- */
-
use test;
-drop dataset ds1;
\ No newline at end of file
+analyze dataset ds1;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
index 759fc3f..a593df3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
@@ -18,7 +18,7 @@
*/
/*
- * Description: check that the sample was dropped
+ * Description: check that the sample was re-created again
*/
use test;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp
new file mode 100644
index 0000000..151309a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: test that the sample index is dropped when its source dataset is dropped
+ */
+
+use test;
+
+drop dataset ds1;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp
new file mode 100644
index 0000000..759fc3f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: check that the sample was dropped
+ */
+
+use test;
+
+select count(*) cnt
+from listMetadata(true, false) v;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
index bacb60c..49f9285 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
@@ -1 +1 @@
-{ "cnt": 0 }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
index f86e66b..bacb60c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
@@ -1 +1 @@
-{ "cnt": 1 }
\ No newline at end of file
+{ "cnt": 0 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
index bacb60c..f86e66b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
@@ -1 +1 @@
-{ "cnt": 0 }
\ No newline at end of file
+{ "cnt": 1 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm
new file mode 100644
index 0000000..bacb60c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm
@@ -0,0 +1 @@
+{ "cnt": 0 }
\ No newline at end of file
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 44471a2..b0826e8 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -267,6 +267,9 @@
INVALID_TIMEZONE(1172),
INVALID_PARAM_VALUE_ALLOWED_VALUE(1173),
SAMPLE_HAS_ZERO_ROWS(1174),
+ INVALID_SAMPLE_SIZE(1175),
+ OUT_OF_RANGE_SAMPLE_SIZE(1176),
+ INVALID_SAMPLE_SEED(1177),
// Feed errors
DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index c9ab080..0bf523a 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -269,7 +269,9 @@
1172 = Provided timezone is invalid: '%1$s'
1173 = Invalid value for parameter '%1$s', allowed value(s): %2$s
1174 = Sample has zero rows
-
+1175 = Sample size options are "low", "medium", "high", or a number
+1176 = Sample size has to be between %1$s and %2$s
+1177 = Sample seed has to be a number or a string convertible to a number
# Feed Errors
3001 = Illegal state.
3002 = Tuple is too large for a frame
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
index cbf2c07..a719c34 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
@@ -19,14 +19,18 @@
package org.apache.asterix.lang.common.statement;
+import java.util.List;
import java.util.Locale;
import org.apache.asterix.common.exceptions.CompilationException;
import org.apache.asterix.common.exceptions.ErrorCode;
import org.apache.asterix.common.metadata.DataverseName;
import org.apache.asterix.lang.common.base.AbstractStatement;
+import org.apache.asterix.lang.common.base.Expression;
+import org.apache.asterix.lang.common.expression.FieldBinding;
import org.apache.asterix.lang.common.expression.RecordConstructor;
import org.apache.asterix.lang.common.util.ExpressionUtils;
+import org.apache.asterix.lang.common.util.LangRecordParseUtil;
import org.apache.asterix.lang.common.visitor.base.ILangVisitor;
import org.apache.asterix.object.base.AdmBigIntNode;
import org.apache.asterix.object.base.AdmDoubleNode;
@@ -57,20 +61,32 @@
throws CompilationException {
this.dataverseName = dataverseName;
this.datasetName = datasetName;
- this.options = options == null ? null : validateOptions(ExpressionUtils.toNode(options));
+ this.options = options == null ? null : validateOptions(options);
}
- private static AdmObjectNode validateOptions(AdmObjectNode options) throws CompilationException {
- for (String fieldName : options.getFieldNames()) {
- switch (fieldName) {
+ private static AdmObjectNode validateOptions(RecordConstructor options) throws CompilationException {
+ final List<FieldBinding> fbList = options.getFbList();
+ for (int i = 0; i < fbList.size(); i++) {
+ FieldBinding binding = fbList.get(i);
+ String key = LangRecordParseUtil.exprToStringLiteral(binding.getLeftExpr()).getStringValue();
+ Expression value = binding.getRightExpr();
+ switch (key) {
case SAMPLE_FIELD_NAME:
+ if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION) {
+ throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE);
+ }
+ break;
case SAMPLE_SEED_FIELD_NAME:
+ if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION
+ && value.getKind() != Expression.Kind.UNARY_EXPRESSION) {
+ throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED);
+ }
break;
default:
- throw new CompilationException(ErrorCode.INVALID_PARAM, fieldName);
+ throw new CompilationException(ErrorCode.INVALID_PARAM, key);
}
}
- return options;
+ return (ExpressionUtils.toNode(options));
}
@Override
@@ -102,18 +118,20 @@
case SAMPLE_HIGH:
return SAMPLE_HIGH_SIZE;
default:
- throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+ throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE);
}
case BIGINT:
int v = (int) ((AdmBigIntNode) n).get();
if (!isValidSampleSize(v)) {
- throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+ throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE,
+ SAMPLE_HIGH_SIZE);
}
return v;
case DOUBLE:
v = (int) ((AdmDoubleNode) n).get();
if (!isValidSampleSize(v)) {
- throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+ throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE,
+ SAMPLE_HIGH_SIZE);
}
return v;
default:
@@ -138,7 +156,7 @@
try {
return Long.parseLong(s);
} catch (NumberFormatException e) {
- throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME);
+ throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED);
}
default:
throw new CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE, SAMPLE_SEED_FIELD_NAME,
@@ -167,4 +185,4 @@
public byte getCategory() {
return Category.DDL;
}
-}
\ No newline at end of file
+}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
index cab1aca..5c91aef 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
@@ -42,6 +42,7 @@
import org.apache.asterix.lang.common.expression.ListConstructor;
import org.apache.asterix.lang.common.expression.LiteralExpr;
import org.apache.asterix.lang.common.expression.RecordConstructor;
+import org.apache.asterix.lang.common.expression.UnaryExpr;
import org.apache.asterix.lang.common.literal.DoubleLiteral;
import org.apache.asterix.lang.common.literal.FloatLiteral;
import org.apache.asterix.lang.common.literal.IntegerLiteral;
@@ -50,6 +51,7 @@
import org.apache.asterix.lang.common.statement.FunctionDecl;
import org.apache.asterix.lang.common.statement.Query;
import org.apache.asterix.lang.common.statement.ViewDecl;
+import org.apache.asterix.lang.common.struct.UnaryExprType;
import org.apache.asterix.lang.common.visitor.GatherFunctionCallsVisitor;
import org.apache.asterix.object.base.AdmArrayNode;
import org.apache.asterix.object.base.AdmBigIntNode;
@@ -80,6 +82,26 @@
return toNode((LiteralExpr) expr);
case RECORD_CONSTRUCTOR_EXPRESSION:
return toNode((RecordConstructor) expr);
+ case UNARY_EXPRESSION:
+ UnaryExpr unaryExpr = (UnaryExpr) expr;
+ UnaryExprType unaryExprType = unaryExpr.getExprType();
+ if (unaryExprType == UnaryExprType.POSITIVE || unaryExprType == UnaryExprType.NEGATIVE) {
+ Expression uexpr = unaryExpr.getExpr();
+ if (uexpr.getKind() == Expression.Kind.LITERAL_EXPRESSION) {
+ if (unaryExprType == UnaryExprType.POSITIVE) {
+ return toNode(uexpr);
+ } else {
+ Literal lit = ((LiteralExpr) uexpr).getValue();
+ return toNode(new LiteralExpr(reverseSign(lit)));
+ }
+ } else {
+ throw new CompilationException(ErrorCode.LITERAL_TYPE_NOT_SUPPORTED_IN_CONSTANT_RECORD,
+ uexpr.getKind());
+ }
+ } else {
+ throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD,
+ unaryExprType);
+ }
default:
throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD, expr.getKind());
}