[ASTERIXDB-3174][COMP] Support CREATE DATASET AS SELECT Syntax

- user model changes: yes
- storage format changes: no
- interface changes: no

Details:
With this change we add support for CREATE DATASET AS SELECT syntax
that can be used to initialize a dataset during it creation by
upserting data from the result of a query.

Change-Id: I4d49b944ed11532ff45b6862345b11b9fae236c1
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17508
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Peeyush Gupta <peeyush.gupta@couchbase.com>
Reviewed-by: Murtadha Hubail <mhubail@apache.org>
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
index 43218d0..d2fd4be 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/translator/QueryTranslator.java
@@ -151,6 +151,7 @@
 import org.apache.asterix.lang.common.statement.SynonymDropStatement;
 import org.apache.asterix.lang.common.statement.TypeDecl;
 import org.apache.asterix.lang.common.statement.TypeDropStatement;
+import org.apache.asterix.lang.common.statement.UpsertStatement;
 import org.apache.asterix.lang.common.statement.ViewDecl;
 import org.apache.asterix.lang.common.statement.ViewDropStatement;
 import org.apache.asterix.lang.common.struct.Identifier;
@@ -730,9 +731,22 @@
             doCreateDatasetStatement(metadataProvider, dd, dataverseName, datasetName, itemTypeDataverseName,
                     itemTypeExpr, itemTypeName, metaItemTypeExpr, metaItemTypeDataverseName, metaItemTypeName, hcc,
                     requestParameters);
+            if (dd.getQuery() != null) {
+                final IResultSet resultSet = requestParameters.getResultSet();
+                final ResultDelivery resultDelivery = requestParameters.getResultProperties().getDelivery();
+                final Stats stats = requestParameters.getStats();
+                IStatementRewriter stmtRewriter = rewriterFactory.createStatementRewriter();
+                final ResultMetadata outMetadata = requestParameters.getOutMetadata();
+                final Map<String, IAObject> stmtParams = requestParameters.getStatementParameters();
+                UpsertStatement upsertStmt =
+                        new UpsertStatement(dataverseName, datasetName, dd.getQuery(), -1, null, null);
+                handleInsertUpsertStatement(metadataProvider, upsertStmt, hcc, resultSet, resultDelivery, outMetadata,
+                        stats, requestParameters, stmtParams, stmtRewriter);
+            }
         } finally {
             metadataProvider.getLocks().unlock();
         }
+
     }
 
     protected Optional<? extends Dataset> doCreateDatasetStatement(MetadataProvider metadataProvider, DatasetDecl dd,
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.01.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.01.ddl.sqlpp
new file mode 100644
index 0000000..11b6f9b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.01.ddl.sqlpp
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+create dataset orders primary key (oid: int64);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.02.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.02.update.sqlpp
new file mode 100644
index 0000000..ab30804
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.02.update.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use test;
+
+copy orders
+using localfs
+(("path"="asterix_nc1://data/nontagged/orderData.json"),("format"="adm"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.03.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.03.ddl.sqlpp
new file mode 100644
index 0000000..b61d2fa
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.03.ddl.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+create dataset orders_copy primary key (oid: int64) as
+select value x from orders x
+
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.04.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.04.update.sqlpp
new file mode 100644
index 0000000..4d8193f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.04.update.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+upsert into orders_copy ([
+    {  "oid": 10,  "cid": 775,  "orderstatus": "ORDER_DELIVERED", "orderpriority": "HIGH",  "clerk": "NEW1",  "total": 14.2326f,  "items": [24, 15]},
+    {  "oid": 10000,  "cid": 775,  "orderstatus": "ORDER_DELIVERED", "orderpriority": "HIGH",  "clerk": "NEW2",  "total": 14.2326f,  "items": [24, 15]}
+]);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.05.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.05.query.sqlpp
new file mode 100644
index 0000000..e5f669b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.05.query.sqlpp
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+select value count(distinct clerk) from orders_copy;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.06.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.06.ddl.sqlpp
new file mode 100644
index 0000000..ec3de13
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.06.ddl.sqlpp
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+create dataset orders_copy2 primary key (oid: int64) as
+select value x from non_existent x
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.07.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.07.ddl.sqlpp
new file mode 100644
index 0000000..0d784ea
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/ddl/create-dataset-3/create-dataset-3.07.ddl.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use test;
+
+drop dataset orders;
+drop dataset orders_copy;
+
+drop dataverse test;
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/create-dataset-3/create-dataset-3.05.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/create-dataset-3/create-dataset-3.05.adm
new file mode 100644
index 0000000..7813681
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/ddl/create-dataset-3/create-dataset-3.05.adm
@@ -0,0 +1 @@
+5
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index 7bcb473..08f71f7 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -4153,6 +4153,12 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="ddl">
+      <compilation-unit name="create-dataset-3">
+        <output-dir compare="Clean-JSON">create-dataset-3</output-dir>
+        <expected-error>ASX1077: Cannot find dataset non_existent in dataverse test nor an alias with name non_existent (in line 23, at column 21)</expected-error>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="ddl">
       <compilation-unit name="analyze-dataset-1">
         <output-dir compare="Text">analyze-dataset-1</output-dir>
       </compilation-unit>
diff --git a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
index b8d1bfd..5454409 100644
--- a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
+++ b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/statement/DatasetDecl.java
@@ -47,10 +47,17 @@
     protected final Map<String, String> hints;
     private final AdmObjectNode withObjectNode;
     protected final boolean ifNotExists;
+    protected final Query query;
 
     public DatasetDecl(DataverseName dataverse, Identifier name, TypeExpression itemType, TypeExpression metaItemType,
             Map<String, String> hints, DatasetType datasetType, IDatasetDetailsDecl idd, RecordConstructor withRecord,
             boolean ifNotExists) throws CompilationException {
+        this(dataverse, name, itemType, metaItemType, hints, datasetType, idd, withRecord, ifNotExists, null);
+    }
+
+    public DatasetDecl(DataverseName dataverse, Identifier name, TypeExpression itemType, TypeExpression metaItemType,
+            Map<String, String> hints, DatasetType datasetType, IDatasetDetailsDecl idd, RecordConstructor withRecord,
+            boolean ifNotExists, Query query) throws CompilationException {
         this.dataverse = dataverse;
         this.name = name;
         this.itemType = itemType;
@@ -60,6 +67,7 @@
         this.ifNotExists = ifNotExists;
         this.datasetType = datasetType;
         this.datasetDetailsDecl = idd;
+        this.query = query;
     }
 
     public boolean getIfNotExists() {
@@ -86,6 +94,10 @@
         return metaItemType;
     }
 
+    public Query getQuery() {
+        return query;
+    }
+
     public String getNodegroupName() {
         AdmObjectNode nodeGroupObj = (AdmObjectNode) withObjectNode.get(DatasetDeclParametersUtil.NODE_GROUP_NAME);
         if (nodeGroupObj == null) {
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
index 01f72aa..9e3dea4 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
+++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
@@ -1124,6 +1124,8 @@
   boolean autogenerated = false;
   Pair<Integer, List<String>> filterField = null;
   RecordConstructor withRecord = null;
+  SelectExpression selectExpr = null;
+  Query query = null;
 }
 {
   nameComponents = QualifiedName()
@@ -1147,8 +1149,15 @@
   ( <HINTS> hints = Properties() )?
   ( LOOKAHEAD(2) <WITH> <FILTER> <ON>  filterField = NestedField() )?
   ( <WITH> withRecord = RecordConstructor() )?
+  ( <AS> selectExpr = SelectExpression(false) )?
   {
     try {
+      if (selectExpr != null) {
+        query = new Query();
+        query.setBody(selectExpr);
+        query.setSourceLocation(selectExpr.getSourceLocation());
+      }
+
       if (typeExpr == null) {
         InternalDetailsDecl idd = new InternalDetailsDecl(primaryKeyFieldsWithTypes.second,
           primaryKeyFieldsWithTypes.first, autogenerated, filterField == null? null : filterField.first,
@@ -1157,7 +1166,7 @@
           new Pair(MetadataBuiltinEntities.ANY_OBJECT_DATATYPE.getDataverseName(),
             new Identifier(MetadataBuiltinEntities.ANY_OBJECT_DATATYPE.getDatatypeName())));
         stmt = new DatasetDecl(nameComponents.first, nameComponents.second, anyObjectReference, null, hints,
-          DatasetType.INTERNAL, idd, withRecord, ifNotExists);
+          DatasetType.INTERNAL, idd, withRecord, ifNotExists, query);
         return addSourceLocation(stmt, startStmtToken);
       } else {
         InternalDetailsDecl idd = new InternalDetailsDecl(primaryKeyFields.second, primaryKeyFields.first, autogenerated,
@@ -1168,7 +1177,7 @@
             true);
         }
         stmt = new DatasetDecl(nameComponents.first, nameComponents.second, typeExpr, metaTypeExpr, hints,
-          DatasetType.INTERNAL, idd, withRecord, ifNotExists);
+          DatasetType.INTERNAL, idd, withRecord, ifNotExists, query);
         return addSourceLocation(stmt, startStmtToken);
       }
     } catch (CompilationException e) {