[ASTERIXDB-3120][ASTERIXDB-3121] CBO Analyze: improve error message.

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
Improve error message if seed value is invalid.
Improve error message if sample value is invalid.

Change-Id: I599005f4f4814f719e422da4ff7d58c2253e5ed4
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17404
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Vijay Sarathy <vijay.sarathy@couchbase.com>
Reviewed-by: Ali Alsuliman <ali.al.solaiman@gmail.com>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
index 3190b2d..3dfad77 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.20.ddl.sqlpp
@@ -18,9 +18,10 @@
  */
 
 /*
- * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement
+ * Description: analyze dataset with sample=high
+ * Note, there are more tuples in the dataset that the target sample size
  */
 
 use test;
 
-analyze dataset ds1 drop statistics;
+analyze dataset ds1 with { "sample": "high", "sample-seed": -10 };
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
index 759fc3f..9e7988c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.21.query.sqlpp
@@ -18,10 +18,13 @@
  */
 
 /*
- * Description: check that the sample was dropped
+ * Description: check that the sample was re-created
  */
 
+set `import-private-functions` `true`;
+
 use test;
 
-select count(*) cnt
-from listMetadata(true, false) v;
+select * from
+  listMetadata(false, false) metadata,
+  showSampleStats("ds1", "sample_idx_2_ds1", false) stats;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
index 7d6bf92..3190b2d 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.22.ddl.sqlpp
@@ -17,6 +17,10 @@
  * under the License.
  */
 
+/*
+ * Description: test that the sample index is dropped using "analyze dataset drop statistics" statement
+ */
+
 use test;
 
-analyze dataset ds1;
+analyze dataset ds1 drop statistics;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
index a593df3..759fc3f 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.23.query.sqlpp
@@ -18,7 +18,7 @@
  */
 
 /*
- * Description: check that the sample was re-created again
+ * Description: check that the sample was dropped
  */
 
 use test;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
index 151309a..7d6bf92 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.24.ddl.sqlpp
@@ -17,10 +17,6 @@
  * under the License.
  */
 
-/*
- * Description: test that the sample index is dropped when its source dataset is dropped
- */
-
 use test;
 
-drop dataset ds1;
\ No newline at end of file
+analyze dataset ds1;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
index 759fc3f..a593df3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.25.query.sqlpp
@@ -18,7 +18,7 @@
  */
 
 /*
- * Description: check that the sample was dropped
+ * Description: check that the sample was re-created again
  */
 
 use test;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp
new file mode 100644
index 0000000..151309a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.26.ddl.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: test that the sample index is dropped when its source dataset is dropped
+ */
+
+use test;
+
+drop dataset ds1;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp
new file mode 100644
index 0000000..759fc3f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/analyze-dataset-1/analyze-dataset-1.27.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description: check that the sample was dropped
+ */
+
+use test;
+
+select count(*) cnt
+from listMetadata(true, false) v;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
index bacb60c..49f9285 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.21.adm
@@ -1 +1 @@
-{ "cnt": 0 }
\ No newline at end of file
+{ "metadata": { "DatasetName": "ds1", "IndexName": "sample_idx_2_ds1", "SampleCardinalityTarget": 17008, "SourceCardinality": 17100, "SourceAvgItemSize": true, "SampleSeed": true }, "stats": { "cnt": 16972, "min_pk": true, "max_pk": true, "min_x": true, "max_x": true } }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
index f86e66b..bacb60c 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.23.adm
@@ -1 +1 @@
-{ "cnt": 1 }
\ No newline at end of file
+{ "cnt": 0 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
index bacb60c..f86e66b 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.25.adm
@@ -1 +1 @@
-{ "cnt": 0 }
\ No newline at end of file
+{ "cnt": 1 }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm
new file mode 100644
index 0000000..bacb60c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/analyze-dataset-1/analyze-dataset-1.27.adm
@@ -0,0 +1 @@
+{ "cnt": 0 }
\ No newline at end of file
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 44471a2..b0826e8 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -267,6 +267,9 @@
     INVALID_TIMEZONE(1172),
     INVALID_PARAM_VALUE_ALLOWED_VALUE(1173),
     SAMPLE_HAS_ZERO_ROWS(1174),
+    INVALID_SAMPLE_SIZE(1175),
+    OUT_OF_RANGE_SAMPLE_SIZE(1176),
+    INVALID_SAMPLE_SEED(1177),
 
     // Feed errors
     DATAFLOW_ILLEGAL_STATE(3001),
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index c9ab080..0bf523a 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -269,7 +269,9 @@
 1172 = Provided timezone is invalid: '%1$s'
 1173 = Invalid value for parameter '%1$s', allowed value(s): %2$s
 1174 = Sample has zero rows
-
+1175 = Sample size options are "low", "medium", "high", or a number
+1176 = Sample size has to be between %1$s and %2$s
+1177 = Sample seed has to be a number or a string convertible to a number
 # Feed Errors
 3001 = Illegal state.
 3002 = Tuple is too large for a frame
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
index cbf2c07..a719c34 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/AnalyzeStatement.java
@@ -19,14 +19,18 @@
 
 package org.apache.asterix.lang.common.statement;
 
+import java.util.List;
 import java.util.Locale;
 
 import org.apache.asterix.common.exceptions.CompilationException;
 import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.common.metadata.DataverseName;
 import org.apache.asterix.lang.common.base.AbstractStatement;
+import org.apache.asterix.lang.common.base.Expression;
+import org.apache.asterix.lang.common.expression.FieldBinding;
 import org.apache.asterix.lang.common.expression.RecordConstructor;
 import org.apache.asterix.lang.common.util.ExpressionUtils;
+import org.apache.asterix.lang.common.util.LangRecordParseUtil;
 import org.apache.asterix.lang.common.visitor.base.ILangVisitor;
 import org.apache.asterix.object.base.AdmBigIntNode;
 import org.apache.asterix.object.base.AdmDoubleNode;
@@ -57,20 +61,32 @@
             throws CompilationException {
         this.dataverseName = dataverseName;
         this.datasetName = datasetName;
-        this.options = options == null ? null : validateOptions(ExpressionUtils.toNode(options));
+        this.options = options == null ? null : validateOptions(options);
     }
 
-    private static AdmObjectNode validateOptions(AdmObjectNode options) throws CompilationException {
-        for (String fieldName : options.getFieldNames()) {
-            switch (fieldName) {
+    private static AdmObjectNode validateOptions(RecordConstructor options) throws CompilationException {
+        final List<FieldBinding> fbList = options.getFbList();
+        for (int i = 0; i < fbList.size(); i++) {
+            FieldBinding binding = fbList.get(i);
+            String key = LangRecordParseUtil.exprToStringLiteral(binding.getLeftExpr()).getStringValue();
+            Expression value = binding.getRightExpr();
+            switch (key) {
                 case SAMPLE_FIELD_NAME:
+                    if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION) {
+                        throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE);
+                    }
+                    break;
                 case SAMPLE_SEED_FIELD_NAME:
+                    if (value.getKind() != Expression.Kind.LITERAL_EXPRESSION
+                            && value.getKind() != Expression.Kind.UNARY_EXPRESSION) {
+                        throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED);
+                    }
                     break;
                 default:
-                    throw new CompilationException(ErrorCode.INVALID_PARAM, fieldName);
+                    throw new CompilationException(ErrorCode.INVALID_PARAM, key);
             }
         }
-        return options;
+        return (ExpressionUtils.toNode(options));
     }
 
     @Override
@@ -102,18 +118,20 @@
                     case SAMPLE_HIGH:
                         return SAMPLE_HIGH_SIZE;
                     default:
-                        throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+                        throw new CompilationException(ErrorCode.INVALID_SAMPLE_SIZE);
                 }
             case BIGINT:
                 int v = (int) ((AdmBigIntNode) n).get();
                 if (!isValidSampleSize(v)) {
-                    throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+                    throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE,
+                            SAMPLE_HIGH_SIZE);
                 }
                 return v;
             case DOUBLE:
                 v = (int) ((AdmDoubleNode) n).get();
                 if (!isValidSampleSize(v)) {
-                    throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_FIELD_NAME);
+                    throw new CompilationException(ErrorCode.OUT_OF_RANGE_SAMPLE_SIZE, SAMPLE_LOW_SIZE,
+                            SAMPLE_HIGH_SIZE);
                 }
                 return v;
             default:
@@ -138,7 +156,7 @@
                 try {
                     return Long.parseLong(s);
                 } catch (NumberFormatException e) {
-                    throw new CompilationException(ErrorCode.INVALID_PROPERTY_FORMAT, SAMPLE_SEED_FIELD_NAME);
+                    throw new CompilationException(ErrorCode.INVALID_SAMPLE_SEED);
                 }
             default:
                 throw new CompilationException(ErrorCode.WITH_FIELD_MUST_BE_OF_TYPE, SAMPLE_SEED_FIELD_NAME,
@@ -167,4 +185,4 @@
     public byte getCategory() {
         return Category.DDL;
     }
-}
\ No newline at end of file
+}
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
index cab1aca..5c91aef 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/util/ExpressionUtils.java
@@ -42,6 +42,7 @@
 import org.apache.asterix.lang.common.expression.ListConstructor;
 import org.apache.asterix.lang.common.expression.LiteralExpr;
 import org.apache.asterix.lang.common.expression.RecordConstructor;
+import org.apache.asterix.lang.common.expression.UnaryExpr;
 import org.apache.asterix.lang.common.literal.DoubleLiteral;
 import org.apache.asterix.lang.common.literal.FloatLiteral;
 import org.apache.asterix.lang.common.literal.IntegerLiteral;
@@ -50,6 +51,7 @@
 import org.apache.asterix.lang.common.statement.FunctionDecl;
 import org.apache.asterix.lang.common.statement.Query;
 import org.apache.asterix.lang.common.statement.ViewDecl;
+import org.apache.asterix.lang.common.struct.UnaryExprType;
 import org.apache.asterix.lang.common.visitor.GatherFunctionCallsVisitor;
 import org.apache.asterix.object.base.AdmArrayNode;
 import org.apache.asterix.object.base.AdmBigIntNode;
@@ -80,6 +82,26 @@
                 return toNode((LiteralExpr) expr);
             case RECORD_CONSTRUCTOR_EXPRESSION:
                 return toNode((RecordConstructor) expr);
+            case UNARY_EXPRESSION:
+                UnaryExpr unaryExpr = (UnaryExpr) expr;
+                UnaryExprType unaryExprType = unaryExpr.getExprType();
+                if (unaryExprType == UnaryExprType.POSITIVE || unaryExprType == UnaryExprType.NEGATIVE) {
+                    Expression uexpr = unaryExpr.getExpr();
+                    if (uexpr.getKind() == Expression.Kind.LITERAL_EXPRESSION) {
+                        if (unaryExprType == UnaryExprType.POSITIVE) {
+                            return toNode(uexpr);
+                        } else {
+                            Literal lit = ((LiteralExpr) uexpr).getValue();
+                            return toNode(new LiteralExpr(reverseSign(lit)));
+                        }
+                    } else {
+                        throw new CompilationException(ErrorCode.LITERAL_TYPE_NOT_SUPPORTED_IN_CONSTANT_RECORD,
+                                uexpr.getKind());
+                    }
+                } else {
+                    throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD,
+                            unaryExprType);
+                }
             default:
                 throw new CompilationException(ErrorCode.EXPRESSION_NOT_SUPPORTED_IN_CONSTANT_RECORD, expr.getKind());
         }