blob: 1831c23fc2409ace9817f625e64b3ef21ce2757a [file] [log] [blame]
<!DOCTYPE html>
<!--
| Generated by Apache Maven Doxia Site Renderer 1.8.1 from target/generated-site/markdown/sqlpp/manual.md at 2022-05-12
| Rendered using Apache Maven Fluido Skin 1.7
-->
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="Date-Revision-yyyymmdd" content="20220512" />
<meta http-equiv="Content-Language" content="en" />
<title>AsterixDB &#x2013; The SQL++ Query Language</title>
<link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
<link rel="stylesheet" href="../css/site.css" />
<link rel="stylesheet" href="../css/print.css" media="print" />
<script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
</head>
<body class="topBarDisabled">
<div class="container-fluid">
<div id="banner">
<div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png" alt="AsterixDB"/></a></div>
<div class="pull-right"></div>
<div class="clear"><hr/></div>
</div>
<div id="breadcrumbs">
<ul class="breadcrumb">
<li id="publishDate">Last Published: 2022-05-12</li>
<li id="projectVersion" class="pull-right">Version: 0.9.8</li>
<li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
</ul>
</div>
<div class="row-fluid">
<div id="leftColumn" class="span2">
<div class="well sidebar-nav">
<ul class="nav nav-list">
<li class="nav-header">Get Started - Installation</li>
<li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
<li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
<li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
<li class="nav-header">AsterixDB Primer</li>
<li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
<li class="nav-header">Data Model</li>
<li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
<li class="nav-header">Queries</li>
<li class="active"><a href="#"><span class="none"></span>The SQL++ Query Language</a></li>
<li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
<li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
<li class="nav-header">API/SDK</li>
<li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
<li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
<li class="nav-header">Advanced Features</li>
<li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
<li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
<li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
<li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
<li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
<li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
<li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
<li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
<li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
<li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
<li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
<li class="nav-header">Deprecated</li>
<li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
<li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
<li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
</ul>
<hr />
<div id="poweredBy">
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<div class="clear"></div>
<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" /></a>
</div>
</div>
</div>
<div id="bodyColumn" class="span10" >
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1>The SQL++ Query Language</h1>
<ul>
<li><a href="#Introduction">1. Introduction</a></li>
<li><a href="#Expressions">2. Expressions</a>
<ul>
<li><a href="#Operator_expressions">Operator Expressions</a>
<ul>
<li><a href="#Arithmetic_operators">Arithmetic Operators</a></li>
<li><a href="#Collection_operators">Collection Operators</a></li>
<li><a href="#Comparison_operators">Comparison Operators</a></li>
<li><a href="#Logical_operators">Logical Operators</a></li>
</ul>
</li>
<li><a href="#Quantified_expressions">Quantified Expressions</a></li>
<li><a href="#Path_expressions">Path Expressions</a></li>
<li><a href="#Primary_expressions">Primary Expressions</a>
<ul>
<li><a href="#Literals">Literals</a></li>
<li><a href="#Variable_references">Identifiers and Variable References</a></li>
<li><a href="#Parameter_references">Parameter References</a></li>
<li><a href="#Parenthesized_expressions">Parenthesized Expressions</a></li>
<li><a href="#Function_call_expressions">Function Calls</a></li>
<li><a href="#Case_expressions">Case Expressions</a></li>
<li><a href="#Constructors">Constructors</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#Queries">3. Queries</a>
<ul>
<li><a href="#Select_clauses">SELECT Clauses</a>
<ul>
<li><a href="#Select_element">Select Value</a></li>
<li><a href="#SQL_select">SQL-style Select</a></li>
<li><a href="#Select_star">Select *</a></li>
<li><a href="#Select_distinct">Select Distinct</a></li>
<li><a href="#Unnamed_projections">Unnamed Projections</a></li>
<li><a href="#Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></li>
</ul>
</li>
<li><a href="#From_clauses">FROM clauses</a>
<ul>
<li><a href="#Joins">Joins</a></li>
</ul>
</li>
<li><a href="#Let_clauses">LET Clauses</a></li>
<li><a href="#WHERE_Clause">WHERE Clause</a></li>
<li><a href="#Grouping">Grouping</a>
<ul>
<li><a href="#GROUP_BY_Clause">GROUP BY Clause</a></li>
<li><a href="#HAVING_Clause">HAVING Clause</a></li>
<li><a href="#Aggregation_PseudoFunctions">Aggregation Pseudo-functions</a></li>
<li><a href="#GROUP_AS_Clause">GROUP AS Clause</a></li>
</ul>
</li>
<li><a href="#Union_all">Selection and UNION ALL</a></li>
<li><a href="#With_clauses">WITH Clauses</a></li>
<li><a href="#Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></li>
<li><a href="#Subqueries">Subqueries</a></li>
</ul>
</li>
<li><a href="#Over_clauses">4. Window Functions</a>
<ul>
<li><a href="#Window_function_call">Window Function Call</a>
<ul>
<li><a href="#Window_function_arguments">Window Function Arguments</a></li>
<li><a href="#Window_function_options">Window Function Options</a></li>
<li><a href="#Window_frame_variable">Window Frame Variable</a></li>
<li><a href="#Window_definition">Window Definition</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#Errors">5. Errors</a>
<ul>
<li><a href="#Syntax_errors">Syntax Errors</a></li>
<li><a href="#Identifier_resolution_errors">Identifier Resolution Errors</a></li>
<li><a href="#Type_errors">Type Errors</a></li>
<li><a href="#Resource_errors">Resource Errors</a></li>
</ul>
</li>
<li><a href="#Vs_SQL-92">6.Differences from SQL-92</a></li>
<li><a href="#DDL_and_DML_statements">7. DDL and DML Statements</a>
<ul>
<li><a href="#Lifecycle_management_statements">Lifecycle Management Statements</a>
<ul>
<li><a href="#Use">Use Statement</a></li>
<li><a href="#Sets">Set Statement</a></li>
<li><a href="#Functions">Function Declaration</a></li>
<li><a href="#Create">Create Statement</a>
<ul>
<li><a href="#Dataverses">Create Dataverse</a></li>
<li><a href="#Types">Create Type</a></li>
<li><a href="#Datasets">Create Dataset</a></li>
<li><a href="#Indices">Create Index</a></li>
<li><a href="#Synonyms">Create Synonym</a></li>
<li><a href="#Create_function">Create Function</a></li>
<li><a href="#Create_view">Create View</a></li>
</ul>
</li>
<li><a href="#Removal">Drop Statement</a></li>
<li><a href="#Load_statement">Load Statement</a></li>
</ul>
</li>
<li><a href="#Modification_statements">Modification Statements</a>
<ul>
<li><a href="#Inserts">Insert Statement</a></li>
<li><a href="#Upserts">Upsert Statement</a></li>
<li><a href="#Deletes">Delete Statement</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#Reserved_keywords">Appendix 1. Reserved Keywords</a></li>
<li><a href="#Performance_tuning">Appendix 2. Performance Tuning</a>
<ul>
<li><a href="#Parallelism_parameter">Parallelism Parameter</a></li>
<li><a href="#Memory_parameters">Memory Parameters</a></li>
<li><a href="#Query_hints">Query Hints</a></li>
</ul>
</li>
<li><a href="#Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></li>
<li><a href="#Manual_data">Appendix 4. Example Data</a>
<ul>
<li><a href="#definition_statements">Data Definitions</a></li>
<li><a href="#customers_data">Customers Dataset</a></li>
<li><a href="#orders_data">Orders Dataset</a></li>
</ul>
</li>
</ul><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Introduction" id="Introduction">1. Introduction</a></h1>
<p>This document is intended as a reference guide to the full syntax and semantics of AsterixDB&#x2019;s query language, a SQL-based language for working with semistructured data. The language is a derivative of SQL++, a declarative query language for JSON data which is largely backwards compatible with SQL. SQL++ originated from research in the FORWARD project at UC San Diego, and it has much in common with SQL; some differences exist due to the different data models that the two languages were designed to serve. SQL was designed for interacting with the flat, schema-ified world of relational databases, while SQL++ generalizes SQL to also handle nested data formats (like JSON) and the schema-optional (or even schema-less) data models of modern NoSQL and BigData systems.</p>
<p>In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model (<a href="../datamodel.html">ADM</a>), a data model based on a superset of JSON with an enriched and flexible type system. New AsterixDB users are encouraged to read and work through the (much friendlier) guide &#x201c;<a href="primer-sqlpp.html">AsterixDB 101: An ADM and SQL++ Primer</a>&#x201d; before attempting to make use of this document. In addition, readers are advised to read through the <a href="../datamodel.html">Asterix Data Model (ADM) reference guide</a> first as well, as an understanding of the data model is a prerequisite to understanding SQL++.</p>
<p>In what follows, we detail the features of the SQL++ language in a grammar-guided manner. We list and briefly explain each of the productions in the query grammar, offering examples (and results) for clarity. In this manual, we will explain how to use the various features of SQL++ using two datasets named <tt>customers</tt> and <tt>orders</tt>. Each dataset is a collection of objects. The contents of the example datasets can be found at the end of this manual in <a href="#Manual_data">Appendix 4</a>.</p>
<p>For additional reading on SQL++ and more examples, refer to <a class="externalLink" href="https://asterixdb.apache.org/files/SQL_Book.pdf">SQL++ for SQL Users: A Tutorial</a>.</p><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Expressions" id="Expressions">2. Expressions</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>An expression is a language fragment that can be evaluated to return a value. For example, the expression 2 + 3 returns the value 5. Expressions are the building blocks from which queries are constructed. SQL++ supports nearly all of the kinds of expressions in SQL, and adds some new kinds as well.</p>
<p>SQL++ is an orthogonal language, which means that expressions can serve as operands of higher level expressions. By nesting expressions inside other expressions, complex queries can be built up. Any expression can be enclosed in parentheses to establish operator precedence.</p>
<p>In this section, we&#x2019;ll discuss the various kinds of SQL++ expressions.</p>
<div class="section">
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Expr"></a>Expr</h5>
<p><img src="../images/diagrams/Expr.png" alt="" /></p></div></div></div></div>
<div class="section">
<h2><a name="Operator_Expressions"></a><a name="Operator_expressions" id="Operator_expressions">Operator Expressions</a></h2>
<p>Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows:</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="OperatorExpr"></a>OperatorExpr</h5>
<p><img src="../images/diagrams/OperatorExpr.png" alt="" /></p>
<p>The language provides a full set of operators that you can use within its statements. Here are the categories of operators:</p>
<ul>
<li><a href="#Arithmetic_operators">Arithmetic Operators</a>, to perform basic mathematical operations;</li>
<li><a href="#Collection_operators">Collection Operators</a>, to evaluate expressions on collections or objects;</li>
<li><a href="#Comparison_operators">Comparison Operators</a>, to compare two expressions;</li>
<li><a href="#Logical_operators">Logical Operators</a>, to combine operators using Boolean logic.</li>
</ul>
<p>The following table summarizes the precedence order (from higher to lower) of the major unary and binary operators:</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Operation </th></tr>
</thead><tbody>
<tr class="b">
<td> EXISTS, NOT EXISTS </td>
<td> Collection emptiness testing </td></tr>
<tr class="a">
<td> ^ </td>
<td> Exponentiation </td></tr>
<tr class="b">
<td> *, /, DIV, MOD (%) </td>
<td> Multiplication, division, modulo </td></tr>
<tr class="a">
<td> +, - </td>
<td> Addition, subtraction </td></tr>
<tr class="b">
<td> || </td>
<td> String concatenation </td></tr>
<tr class="a">
<td> IS NULL, IS NOT NULL, IS MISSING, IS NOT MISSING, <br />IS UNKNOWN, IS NOT UNKNOWN, IS VALUED, IS NOT VALUED </td>
<td> Unknown value comparison </td></tr>
<tr class="b">
<td> BETWEEN, NOT BETWEEN </td>
<td> Range comparison (inclusive on both sides) </td></tr>
<tr class="a">
<td> =, !=, &lt;&gt;, &lt;, &gt;, &lt;=, &gt;=, LIKE, NOT LIKE, IN, NOT IN, IS DISTINCT FROM, IS NOT DISTINCT FROM </td>
<td> Comparison </td></tr>
<tr class="b">
<td> NOT </td>
<td> Logical negation </td></tr>
<tr class="a">
<td> AND </td>
<td> Conjunction </td></tr>
<tr class="b">
<td> OR </td>
<td> Disjunction </td></tr>
</tbody>
</table>
<p>In general, if any operand evaluates to a <tt>MISSING</tt> value, the enclosing operator will return <tt>MISSING</tt>; if none of the operands evaluates to a <tt>MISSING</tt> value but there is an operand which evaluates to a <tt>NULL</tt> value, the enclosing operator will return <tt>NULL</tt>. However, there are a few exceptions listed in <a href="#Comparison_operators">comparison operators</a> and <a href="#Logical_operators">logical operators</a>.</p></div></div></div>
<div class="section">
<h3><a name="Arithmetic_Operators"></a><a name="Arithmetic_operators" id="Arithmetic_operators">Arithmetic Operators</a></h3>
<p>Arithmetic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Purpose </th>
<th> Example </th></tr>
</thead><tbody>
<tr class="b">
<td> +, - </td>
<td> As unary operators, they denote a <br />positive or negative expression </td>
<td> SELECT VALUE -1; </td></tr>
<tr class="a">
<td> +, - </td>
<td> As binary operators, they add or subtract </td>
<td> SELECT VALUE 1 + 2; </td></tr>
<tr class="b">
<td> * </td>
<td> Multiply </td>
<td> SELECT VALUE 4 * 2; </td></tr>
<tr class="a">
<td> / </td>
<td> Divide (returns a value of type <tt>double</tt> if both operands are integers)</td>
<td> SELECT VALUE 5 / 2; </td></tr>
<tr class="b">
<td> DIV </td>
<td> Divide (returns an integer value if both operands are integers) </td>
<td> SELECT VALUE 5 DIV 2; </td></tr>
<tr class="a">
<td> MOD (%) </td>
<td> Modulo </td>
<td> SELECT VALUE 5 % 2; </td></tr>
<tr class="b">
<td> ^ </td>
<td> Exponentiation </td>
<td> SELECT VALUE 2^3; </td></tr>
<tr class="a">
<td> || </td>
<td> String concatenation </td>
<td> SELECT VALUE &#x201c;ab&#x201d;||&#x201c;c&#x201d;||&#x201c;d&#x201d;; </td></tr>
</tbody>
</table></div>
<div class="section">
<h3><a name="Collection_Operators"></a><a name="Collection_operators" id="Collection_operators">Collection Operators</a></h3>
<p>Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS).</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Purpose </th>
<th> Example </th></tr>
</thead><tbody>
<tr class="b">
<td> IN </td>
<td> Membership test </td>
<td> FROM customers AS c <br />WHERE c.address.zipcode IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *; </td></tr>
<tr class="a">
<td> NOT IN </td>
<td> Non-membership test </td>
<td> FROM customers AS c <br />WHERE c.address.zipcode NOT IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *;</td></tr>
<tr class="b">
<td> EXISTS </td>
<td> Check whether a collection is not empty </td>
<td> FROM orders AS o <br />WHERE EXISTS o.items <br /> SELECT *;</td></tr>
<tr class="a">
<td> NOT EXISTS </td>
<td> Check whether a collection is empty </td>
<td> FROM orders AS o <br />WHERE NOT EXISTS o.items <br /> SELECT *; </td></tr>
</tbody>
</table></div>
<div class="section">
<h3><a name="Comparison_Operators"></a><a name="Comparison_operators" id="Comparison_operators">Comparison Operators</a></h3>
<p>Comparison operators are used to compare values.</p>
<p>The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons. SQL++ (and JSON) has two ways of representing missing information in an object &#x2014; the presence of the field with a NULL for its value (as in SQL), and the absence of the field (which JSON permits). For example, the first of the following objects represents Jack, whose friend is Jill. In the other examples, Jake is friendless &#xe0; la SQL, with a friend field that is NULL, while Joe is friendless in a more natural (for JSON) way, i.e., by not having a friend field.</p>
<div class="section">
<div class="section">
<h5><a name="Examples"></a>Examples</h5>
<div>
<div>
<pre class="source">{&quot;name&quot;: &quot;Jack&quot;, &quot;friend&quot;: &quot;Jill&quot;}
{&quot;name&quot;: &quot;Jake&quot;, &quot;friend&quot;: NULL}
{&quot;name&quot;: &quot;Joe&quot;}
</pre></div></div>
<p>The following table enumerates all of the comparison operators available in SQL++.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Purpose </th>
<th> Example </th></tr>
</thead><tbody>
<tr class="b">
<td> IS NULL </td>
<td> Test if a value is NULL </td>
<td>FROM customers AS c <br />WHERE c.name IS NULL <br /> SELECT *; </td></tr>
<tr class="a">
<td> IS NOT NULL </td>
<td> Test if a value is not NULL </td>
<td> FROM customers AS c <br />WHERE c.name IS NOT NULL <br /> SELECT *; </td></tr>
<tr class="b">
<td> IS MISSING </td>
<td> Test if a value is MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS MISSING <br /> SELECT *; </td></tr>
<tr class="a">
<td> IS NOT MISSING </td>
<td> Test if a value is not MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS NOT MISSING <br /> SELECT *; </td></tr>
<tr class="b">
<td> IS UNKNOWN </td>
<td> Test if a value is NULL or MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS UNKNOWN <br /> SELECT *; </td></tr>
<tr class="a">
<td> IS NOT UNKNOWN </td>
<td> Test if a value is neither NULL nor MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS NOT UNKNOWN <br /> SELECT *; </td></tr>
<tr class="b">
<td> IS KNOWN (IS VALUED) </td>
<td> Test if a value is neither NULL nor MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS KNOWN <br /> SELECT *; </td></tr>
<tr class="a">
<td> IS NOT KNOWN (IS NOT VALUED) </td>
<td> Test if a value is NULL or MISSING </td>
<td> FROM customers AS c <br />WHERE c.name IS NOT KNOWN <br /> SELECT *; </td></tr>
<tr class="b">
<td> BETWEEN </td>
<td> Test if a value is between a start value and a end value. The comparison is inclusive of both the start and end values. </td>
<td> FROM customers AS c WHERE c.rating BETWEEN 600 AND 700 SELECT *;</td></tr>
<tr class="a">
<td> = </td>
<td> Equality test </td>
<td> FROM customers AS c <br /> WHERE c.rating = 640 <br /> SELECT *; </td></tr>
<tr class="b">
<td> != </td>
<td> Inequality test </td>
<td> FROM customers AS c <br /> WHERE c.rating != 640 <br /> SELECT *;</td></tr>
<tr class="a">
<td> &lt;&gt; </td>
<td> Inequality test </td>
<td> FROM customers AS c <br /> WHERE c.rating &lt;&gt; 640 <br /> SELECT *;</td></tr>
<tr class="b">
<td> &lt; </td>
<td> Less than </td>
<td> FROM customers AS c <br /> WHERE c.rating &lt; 640 <br /> SELECT *; </td></tr>
<tr class="a">
<td> &gt; </td>
<td> Greater than </td>
<td> FROM customers AS c <br /> WHERE c.rating &gt; 640 <br /> SELECT *; </td></tr>
<tr class="b">
<td> &lt;= </td>
<td> Less than or equal to </td>
<td> FROM customers AS c <br /> WHERE c.rating &lt;= 640 <br /> SELECT *; </td></tr>
<tr class="a">
<td> &gt;= </td>
<td> Greater than or equal to </td>
<td> FROM customers AS c <br /> WHERE c.rating &gt;= 640 <br /> SELECT *; </td></tr>
<tr class="b">
<td> LIKE </td>
<td> Test if the left side matches a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
<td> FROM customers AS c WHERE c.name LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
<tr class="a">
<td> NOT LIKE </td>
<td> Test if the left side does not match a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
<td> FROM customers AS c WHERE c.name NOT LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
<tr class="b">
<td> IS DISTINCT FROM </td>
<td> Inequality test that that treats NULL values as equal to each other and MISSING values as equal to each other </td>
<td> FROM orders AS o <br /> WHERE o.order_date IS DISTINCT FROM o.ship_date <br /> SELECT *; </td>
<td> </td></tr>
<tr class="a">
<td> IS NOT DISTINCT FROM </td>
<td> Equality test that treats NULL values as equal to each other and MISSING values as equal to each other </td>
<td> FROM orders AS o <br /> WHERE o.order_date IS NOT DISTINCT FROM o.ship_date <br /> SELECT *; </td></tr>
</tbody>
</table>
<p>The following table summarizes how the missing value comparison operators work.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Non-NULL/Non-MISSING value </th>
<th> NULL value</th>
<th> MISSING value</th></tr>
</thead><tbody>
<tr class="b">
<td> IS NULL </td>
<td> FALSE </td>
<td> TRUE </td>
<td> MISSING </td></tr>
<tr class="a">
<td> IS NOT NULL </td>
<td> TRUE </td>
<td> FALSE </td>
<td> MISSING </td></tr>
<tr class="b">
<td> IS MISSING </td>
<td> FALSE </td>
<td> FALSE </td>
<td> TRUE </td></tr>
<tr class="a">
<td> IS NOT MISSING </td>
<td> TRUE </td>
<td> TRUE </td>
<td> FALSE </td></tr>
<tr class="b">
<td> IS UNKNOWN </td>
<td> FALSE </td>
<td> TRUE </td>
<td> TRUE </td></tr>
<tr class="a">
<td> IS NOT UNKNOWN </td>
<td> TRUE </td>
<td> FALSE </td>
<td> FALSE</td></tr>
<tr class="b">
<td> IS KNOWN (IS VALUED) </td>
<td> TRUE </td>
<td> FALSE </td>
<td> FALSE </td></tr>
<tr class="a">
<td> IS NOT KNOWN (IS NOT VALUED) </td>
<td> FALSE </td>
<td> TRUE </td>
<td> TRUE </td></tr>
</tbody>
</table></div></div></div>
<div class="section">
<h3><a name="Logical_Operators"></a><a name="Logical_operators" id="Logical_operators">Logical Operators</a></h3>
<p>Logical operators perform logical <tt>NOT</tt>, <tt>AND</tt>, and <tt>OR</tt> operations over Boolean values (<tt>TRUE</tt> and <tt>FALSE</tt>) plus <tt>NULL</tt> and <tt>MISSING</tt>.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Operator </th>
<th> Purpose </th>
<th> Example </th></tr>
</thead><tbody>
<tr class="b">
<td> NOT </td>
<td> Returns true if the following condition is false, otherwise returns false </td>
<td> SELECT VALUE NOT 1 = 1; <br /> Returns FALSE </td></tr>
<tr class="a">
<td> AND </td>
<td> Returns true if both branches are true, otherwise returns false </td>
<td> SELECT VALUE 1 = 2 AND 1 = 1; <br /> Returns FALSE</td></tr>
<tr class="b">
<td> OR </td>
<td> Returns true if one branch is true, otherwise returns false </td>
<td> SELECT VALUE 1 = 2 OR 1 = 1; <br /> Returns TRUE </td></tr>
</tbody>
</table>
<p>The following table is the truth table for <tt>AND</tt> and <tt>OR</tt>.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> A </th>
<th> B </th>
<th> A AND B </th>
<th> A OR B </th></tr>
</thead><tbody>
<tr class="b">
<td> TRUE </td>
<td> TRUE </td>
<td> TRUE </td>
<td> TRUE </td></tr>
<tr class="a">
<td> TRUE </td>
<td> FALSE </td>
<td> FALSE </td>
<td> TRUE </td></tr>
<tr class="b">
<td> TRUE </td>
<td> NULL </td>
<td> NULL </td>
<td> TRUE </td></tr>
<tr class="a">
<td> TRUE </td>
<td> MISSING </td>
<td> MISSING </td>
<td> TRUE </td></tr>
<tr class="b">
<td> FALSE </td>
<td> FALSE </td>
<td> FALSE </td>
<td> FALSE </td></tr>
<tr class="a">
<td> FALSE </td>
<td> NULL </td>
<td> FALSE </td>
<td> NULL </td></tr>
<tr class="b">
<td> FALSE </td>
<td> MISSING </td>
<td> FALSE </td>
<td> MISSING </td></tr>
<tr class="a">
<td> NULL </td>
<td> NULL </td>
<td> NULL </td>
<td> NULL </td></tr>
<tr class="b">
<td> NULL </td>
<td> MISSING </td>
<td> MISSING </td>
<td> NULL </td></tr>
<tr class="a">
<td> MISSING </td>
<td> MISSING </td>
<td> MISSING </td>
<td> MISSING </td></tr>
</tbody>
</table>
<p>The following table demonstrates the results of <tt>NOT</tt> on all possible inputs.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> A </th>
<th> NOT A </th></tr>
</thead><tbody>
<tr class="b">
<td> TRUE </td>
<td> FALSE </td></tr>
<tr class="a">
<td> FALSE </td>
<td> TRUE </td></tr>
<tr class="b">
<td> NULL </td>
<td> NULL </td></tr>
<tr class="a">
<td> MISSING </td>
<td> MISSING </td></tr>
</tbody>
</table></div></div>
<div class="section">
<h2><a name="Quantified_Expressions"></a><a name="Quantified_expressions" id="Quantified_expressions">Quantified Expressions</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="QuantifiedExpr"></a>QuantifiedExpr</h5>
<p><img src="../images/diagrams/QuantifiedExpr.png" alt="" /></p>
<p>Synonym for <tt>SOME</tt>: <tt>ANY</tt></p>
<p>Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.</p>
<p>The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the set [1, 2, 3] of integers is less than three. The first example yields <tt>FALSE</tt> and second example yields <tt>TRUE</tt>.</p>
<p>It is useful to note that if the set were instead the empty set, the first expression would yield <tt>TRUE</tt> (&#x201c;every&#x201d; value in an empty set satisfies the condition) while the second expression would yield <tt>FALSE</tt> (since there isn&#x2019;t &#x201c;some&#x201d; value, as there are no values in the set, that satisfies the condition). To express a universal predicate that yields <tt>FALSE</tt> with the empty set, we would use the quantifier <tt>SOME AND EVERY</tt> in lieu of <tt>EVERY</tt>.</p>
<p>A quantified expression will return a <tt>NULL</tt> (or <tt>MISSING</tt>) if the first expression in it evaluates to <tt>NULL</tt> (or <tt>MISSING</tt>). Otherwise, a type error will be raised if the first expression in a quantified expression does not return a collection.</p></div>
<div class="section">
<h5><a name="Examples"></a>Examples</h5>
<div>
<div>
<pre class="source">EVERY x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 -- &#x278a;
SOME x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 -- &#x278b;
</pre></div></div>
<p>&#x2780; Returns <tt>FALSE</tt><br />
&#x2781; Returns <tt>TRUE</tt></p></div></div></div></div>
<div class="section">
<h2><a name="Path_Expressions"></a><a name="Path_expressions" id="Path_expressions">Path Expressions</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="PathExpr"></a>PathExpr</h5>
<p><img src="../images/diagrams/PathExpr.png" alt="" /></p>
<p>Components of complex types in the data model are accessed via path expressions. Path access can be applied to the result of a query expression that yields an instance of a complex type, for example, an object or an array instance.</p>
<p>For objects, path access is based on field names, and it accesses the field whose name was specified.</p>
<p>For arrays, path access is based on (zero-based) array-style indexing. Array indices can be used to retrieve either a single element from an array, or a whole subset of an array. Accessing a single element is achieved by providing a single index argument (zero-based element position), while obtaining a subset of an array is achieved by providing the <tt>start</tt> and <tt>end</tt> (zero-based) index positions; the returned subset is from position <tt>start</tt> to position <tt>end - 1</tt>; the <tt>end</tt> position argument is optional. If a position argument is negative then the element position is counted from the end of the array (<tt>-1</tt> addresses the last element, <tt>-2</tt> next to last, and so on).</p>
<p>Multisets have similar behavior to arrays, except for retrieving arbitrary items as the order of items is not fixed in multisets.</p>
<p>Attempts to access non-existent fields or out-of-bound array elements produce the special value <tt>MISSING</tt>. Type errors will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.</p>
<p>The following examples illustrate field access for an object, index-based element access or subset retrieval of an array, and also a composition thereof.</p></div>
<div class="section">
<h5><a name="Examples"></a>Examples</h5>
<div>
<div>
<pre class="source">({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array -- &#x278a;
([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[2] -- &#x278b;
([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-1] -- &#x278c;
({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array[2] -- &#x278d;
([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:2] -- &#x278e;
([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:] -- &#x278f;
([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-2:-1] -- &#x2790;
</pre></div></div>
<p>&#x2780; Returns <tt>[[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]</tt><br />
&#x2781; Returns <tt>[&quot;c&quot;]</tt><br />
&#x2782; Returns <tt>[&quot;c&quot;]</tt><br />
&#x2783; Returns <tt>[&quot;c&quot;]</tt><br />
&#x2784; Returns <tt>[[&quot;a&quot;, &quot;b&quot;]]</tt><br />
&#x2785; Returns <tt>[[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]</tt><br />
&#x2786; Returns <tt>[[&quot;b&quot;]]</tt></p></div></div></div></div>
<div class="section">
<h2><a name="Primary_Expressions"></a><a name="Primary_expressions" id="Primary_expressions">Primary Expressions</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="PrimaryExpr"></a>PrimaryExpr</h5>
<p><img src="../images/diagrams/PrimaryExpr.png" alt="" /></p>
<p>The most basic building block for any expression in SQL++ is Primary Expression. This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object, array, or multiset of data model instances).</p></div></div></div>
<div class="section">
<h3><a name="Literals" id="Literals">Literals</a></h3>
<div class="section">
<div class="section">
<h5><a name="Literal"></a>Literal</h5>
<p><img src="../images/diagrams/Literal.png" alt="" /></p>
<p>The simplest kind of expression is a literal that directly represents a value in JSON format. Here are some examples:</p>
<div>
<div>
<pre class="source">-42
&quot;Hello&quot;
true
false
null
</pre></div></div>
<p>Numeric literals may include a sign and an optional decimal point. They may also be written in exponential notation, like this:</p>
<div>
<div>
<pre class="source">5e2
-4.73E-2
</pre></div></div>
<p>String literals may be enclosed in either single quotes or double quotes. Inside a string literal, the delimiter character for that string must be &#x201c;escaped&#x201d; by a backward slash, as in these examples:</p>
<div>
<div>
<pre class="source">&quot;I read \&quot;War and Peace\&quot; today.&quot;
'I don\'t believe everything I read.'
</pre></div></div>
<p>The table below shows how to escape characters in SQL++.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th>Character Name </th>
<th>Escape Method</th></tr>
</thead><tbody>
<tr class="b">
<td>Single Quote</td>
<td> <tt>\'</tt></td></tr>
<tr class="a">
<td>Double Quote</td>
<td><tt>\&quot;</tt></td></tr>
<tr class="b">
<td>Backslash</td>
<td><tt>\\</tt></td></tr>
<tr class="a">
<td>Slash</td>
<td><tt>\/</tt></td></tr>
<tr class="b">
<td>Backspace</td>
<td><tt>\b</tt></td></tr>
<tr class="a">
<td>Formfeed</td>
<td><tt>\f</tt></td></tr>
<tr class="b">
<td>Newline</td>
<td><tt>\n</tt></td></tr>
<tr class="a">
<td>CarriageReturn</td>
<td><tt>\r</tt></td></tr>
<tr class="b">
<td>EscapeTab</td>
<td><tt>\t</tt></td></tr>
</tbody>
</table></div></div></div>
<div class="section">
<h3><a name="Identifiers_and_Variable_References"></a><a name="Variable_references" id="Variable_references">Identifiers and Variable References</a></h3>
<p>Like SQL, SQL++ makes use of a language construct called an <i>identifier</i>. An identifier starts with an alphabetic character or the underscore character _ , and contains only case-sensitive alphabetic characters, numeric digits, or the special characters _ and $. It is also possible for an identifier to include other special characters, or to be the same as a reserved word, by enclosing the identifier in back-ticks (it&#x2019;s then called a <i>delimited identifier</i>). Identifiers are used in variable names and in certain other places in SQL++ syntax, such as in path expressions, which we&#x2019;ll discuss soon. Here are some examples of identifiers:</p>
<div>
<div>
<pre class="source">X
customer_name
`SELECT`
`spaces in here`
`@&amp;#`
</pre></div></div>
<p>A very simple kind of SQL++ expression is a variable, which is simply an identifier. As in SQL, a variable can be bound to a value, which may be an input dataset, some intermediate result during processing of a query, or the final result of a query. We&#x2019;ll learn more about variables when we discuss queries.</p>
<p>Note that the SQL++ rules for delimiting strings and identifiers are different from the SQL rules. In SQL, strings are always enclosed in single quotes, and double quotes are used for delimited identifiers.</p></div>
<div class="section">
<h3><a name="Parameter_References"></a><a name="Parameter_references" id="Parameter_references">Parameter References</a></h3>
<p>A parameter reference is an external variable. Its value is provided using the <a href="../api.html#queryservice">statement execution API</a>.</p>
<p>Parameter references come in two forms, <i>Named Parameter References</i> and <i>Positional Parameter References</i>.</p>
<p>Named parameter references consist of the &#x201c;$&#x201d; symbol followed by an identifier or delimited identifier.</p>
<p>Positional parameter references can be either a &#x201c;$&#x201d; symbol followed by one or more digits or a &#x201c;?&#x201d; symbol. If numbered, positional parameters start at 1. &#x201c;?&#x201d; parameters are interpreted as $1 to $N based on the order in which they appear in the statement.</p>
<p>Parameter references may appear as shown in the below examples:</p>
<div class="section">
<div class="section">
<h5><a name="Examples"></a>Examples</h5>
<div>
<div>
<pre class="source">$id
$1
?
</pre></div></div>
<p>An error will be raised in the parameter is not bound at query execution time.</p></div></div></div>
<div class="section">
<h3><a name="Parenthesized_Expressions"></a><a name="Parenthesized_expressions" id="Parenthesized_expressions">Parenthesized Expressions</a></h3>
<div class="section">
<div class="section">
<h5><a name="ParenthesizedExpr"></a>ParenthesizedExpr</h5>
<p><img src="../images/diagrams/ParenthesizedExpr.png" alt="" /></p></div>
<div class="section">
<h5><a name="Subquery"></a>Subquery</h5>
<p><img src="../images/diagrams/Subquery.png" alt="" /></p>
<p>An expression can be parenthesized to control the precedence order or otherwise clarify a query. A <a href="#Subqueries">subquery</a> (nested <a href="#Union_all">selection</a>) may also be enclosed in parentheses. For more on these topics please see their respective sections.</p>
<p>The following expression evaluates to the value 2.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">( 1 + 1 )
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Function_Calls"></a><a name="Function_call_expressions" id="Function_call_expressions">Function Calls</a></h3>
<div class="section">
<div class="section">
<h5><a name="FunctionCall"></a>FunctionCall</h5>
<p><img src="../images/diagrams/FunctionCall.png" alt="" /></p></div>
<div class="section">
<h5><a name="OrdinaryFunctionCall"></a>OrdinaryFunctionCall</h5>
<p><img src="../images/diagrams/OrdinaryFunctionCall.png" alt="" /></p></div>
<div class="section">
<h5><a name="AggregateFunctionCall"></a>AggregateFunctionCall</h5>
<p><img src="../images/diagrams/AggregateFunctionCall.png" alt="" /></p></div>
<div class="section">
<h5><a name="DataverseName"></a>DataverseName</h5>
<p><img src="../images/diagrams/DataverseName.png" alt="" /></p>
<p>Functions are included in SQL++, like most languages, as a way to package useful functionality or to componentize complicated or reusable computations. A function call is a legal query expression that represents the value resulting from the evaluation of its body expression with the given parameter bindings; the parameter value bindings can themselves be any expressions in SQL++.</p>
<p>Note that Window functions, and aggregate functions used as window functions, have a more complex syntax. Window function calls are described in the section on <a href="#Over_clauses">Window Queries</a>.</p>
<p>Also note that FILTER expressions can only be specified when calling <a href="#Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a>.</p>
<p>The following example is a function call expression whose value is 8.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">length('a string')
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Case_Expressions"></a><a name="Case_expressions" id="Case_expressions">Case Expressions</a></h3>
<div class="section">
<div class="section">
<h5><a name="CaseExpr"></a>CaseExpr</h5>
<p><img src="../images/diagrams/CaseExpr.png" alt="" /></p></div>
<div class="section">
<h5><a name="SimpleCaseExpr"></a>SimpleCaseExpr</h5>
<p><img src="../images/diagrams/SimpleCaseExpr.png" alt="" /></p></div>
<div class="section">
<h5><a name="SearchedCaseExpr"></a>SearchedCaseExpr</h5>
<p><img src="../images/diagrams/SearchedCaseExpr.png" alt="" /></p>
<p>In a simple <tt>CASE</tt> expression, the query evaluator searches for the first <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pair in which the <tt>WHEN</tt> expression is equal to the expression following <tt>CASE</tt> and returns the expression following <tt>THEN</tt>. If none of the <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pairs meet this condition, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, <tt>NULL</tt> is returned.</p>
<p>In a searched CASE expression, the query evaluator searches from left to right until it finds a <tt>WHEN</tt> expression that is evaluated to <tt>TRUE</tt>, and then returns its corresponding <tt>THEN</tt> expression. If no condition is found to be <tt>TRUE</tt>, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, it returns <tt>NULL</tt>.</p>
<p>The following example illustrates the form of a case expression.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CASE (2 &lt; 3) WHEN true THEN &quot;yes&quot; ELSE &quot;no&quot; END
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Constructors" id="Constructors">Constructors</a></h3>
<div class="section">
<div class="section">
<h5><a name="Constructor"></a>Constructor</h5>
<p><img src="../images/diagrams/Constructor.png" alt="" /></p></div>
<div class="section">
<h5><a name="ObjectConstructor"></a>ObjectConstructor</h5>
<p><img src="../images/diagrams/ObjectConstructor.png" alt="" /></p></div>
<div class="section">
<h5><a name="ArrayConstructor"></a>ArrayConstructor</h5>
<p><img src="../images/diagrams/ArrayConstructor.png" alt="" /></p></div>
<div class="section">
<h5><a name="ParenthesizedArrayConstructor"></a>ParenthesizedArrayConstructor</h5>
<p><img src="../images/diagrams/ParenthesizedArrayConstructor.png" alt="" /></p></div>
<div class="section">
<h5><a name="MultisetConstructor"></a>MultisetConstructor</h5>
<p><img src="../images/diagrams/MultisetConstructor.png" alt="" /></p>
<p>Structured JSON values can be represented by constructors, as in these examples:</p>
<div>
<div>
<pre class="source">{ &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 42 } -- &#x278a;
[ 1, 2, &quot;Hello&quot;, null ] -- &#x278b;
</pre></div></div>
<p>&#x2780; An object<br />
&#x2781; An array</p>
<p>In a constructed object, the names of the fields must be strings (either literal strings or computed strings), and an object may not contain any duplicate names. Of course, structured literals can be nested, as in this example:</p>
<div>
<div>
<pre class="source">[ {&quot;name&quot;: &quot;Bill&quot;,
&quot;address&quot;:
{&quot;street&quot;: &quot;25 Main St.&quot;,
&quot;city&quot;: &quot;Cincinnati, OH&quot;
}
},
{&quot;name&quot;: &quot;Mary&quot;,
&quot;address&quot;:
{&quot;street&quot;: &quot;107 Market St.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;
}
}
]
</pre></div></div>
<p>The array items in an array constructor, and the field-names and field-values in an object constructor, may be represented by expressions. For example, suppose that the variables firstname, lastname, salary, and bonus are bound to appropriate values. Then structured values might be constructed by the following expressions:</p>
<p>An object:</p>
<div>
<div>
<pre class="source">{
&quot;name&quot;: firstname || &quot; &quot; || lastname,
&quot;income&quot;: salary + bonus
}
</pre></div></div>
<p>An array:</p>
<div>
<div>
<pre class="source">[&quot;1984&quot;, lastname, salary + bonus, null]
</pre></div></div>
<p>If only one expression is specified instead of the field-name/field-value pair in an object constructor then this expression is supposed to provide the field value. The field name is then automatically generated based on the kind of the value expression as in Q2.1:</p>
<ul>
<li>If it is a variable reference expression then the generated field name is the name of that variable.</li>
<li>If it is a field access expression then the generated field name is the last identifier in that expression.</li>
<li>For all other cases, a compilation error will be raised.</li>
</ul></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q2.1)</p>
<div>
<div>
<pre class="source">FROM customers AS c
WHERE c.custid = &quot;C47&quot;
SELECT VALUE {c.name, c.rating}
</pre></div></div>
<p>This query outputs:</p>
<div>
<div>
<pre class="source">[
{
&quot;name&quot;: &quot;S. Logan&quot;,
&quot;rating&quot;: 625
}
]
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Queries" id="Queries">3. Queries</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>A <i>query</i> can be an expression, or it can be constructed from blocks of code called <i>query blocks</i>. A query block may contain several clauses, including <tt>SELECT</tt>, <tt>FROM</tt>, <tt>LET</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt>.</p></div>
<div class="section">
<h5><a name="Query"></a>Query</h5>
<p><img src="../images/diagrams/Query.png" alt="" /></p></div>
<div class="section">
<h5><a name="Selection"></a>Selection</h5>
<p><img src="../images/diagrams/Selection.png" alt="" /></p></div>
<div class="section">
<h5><a name="QueryBlock"></a>QueryBlock</h5>
<p><img src="../images/diagrams/QueryBlock.png" alt="" /></p></div>
<div class="section">
<h5><a name="StreamGenerator"></a>StreamGenerator</h5>
<p><img src="../images/diagrams/StreamGenerator.png" alt="" /></p>
<p>Note that, unlike SQL, SQL++ allows the <tt>SELECT</tt> clause to appear either at the beginning or at the end of a query block. For some queries, placing the <tt>SELECT</tt> clause at the end may make a query block easier to understand, because the <tt>SELECT</tt> clause refers to variables defined in the other clauses.</p></div></div></div></div>
<div class="section">
<h2><a name="SELECT_Clause"></a><a name="Select_clauses" id="Select_clauses">SELECT Clause</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="SelectClause"></a>SelectClause</h5>
<p><img src="../images/diagrams/SelectClause.png" alt="" /></p></div>
<div class="section">
<h5><a name="Projection"></a>Projection</h5>
<p><img src="../images/diagrams/Projection.png" alt="" /></p>
<p>Synonyms for <tt>VALUE</tt>: <tt>ELEMENT</tt>, <tt>RAW</tt></p>
<p>In a query block, the <tt>FROM</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt> clauses (if present) are collectively called the Stream Generator. All these clauses, taken together, generate a stream of tuples of bound variables. The <tt>SELECT</tt> clause then uses these bound variables to generate the output of the query block.</p>
<p>For example, the clause <tt>FROM customers AS c</tt> scans over the <tt>customers</tt> collection, binding the variable <tt>c</tt> to each <tt>customer</tt> object in turn, producing a stream of bindings.</p>
<p>Here&#x2019;s a slightly more complex example of a stream generator:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">FROM customers AS c, orders AS o
WHERE c.custid = o.custid
</pre></div></div>
<p>In this example, the <tt>FROM</tt> clause scans over the customers and orders collections, producing a stream of variable pairs (<tt>c</tt>, <tt>o</tt>) in which <tt>c</tt> is bound to a <tt>customer</tt> object and <tt>o</tt> is bound to an <tt>order</tt> object. The <tt>WHERE</tt> clause then retains only those pairs in which the custid values of the two objects match.</p>
<p>The output of the query block is a collection containing one output item for each tuple produced by the stream generator. If the stream generator produces no tuples, the output of the query block is an empty collection. Depending on the <tt>SELECT</tt> clause, each output item may be an object or some other kind of value.</p>
<p>In addition to using the variables bound by previous clauses, the <tt>SELECT</tt> clause may create and bind some additional variables. For example, the clause <tt>SELECT salary + bonus AS pay</tt> creates the variable <tt>pay</tt> and binds it to the value of <tt>salary + bonus</tt>. This variable may then be used in a later <tt>ORDER BY</tt> clause.</p>
<p>In SQL++, the <tt>SELECT</tt> clause may appear either at the beginning or at the end of a query block. Since the <tt>SELECT</tt> clause depends on variables that are bound in the other clauses, the examples in this section place <tt>SELECT</tt> at the end of the query blocks.</p></div></div></div>
<div class="section">
<h3><a name="SELECT_VALUE"></a><a name="Select_element" id="Select_element">SELECT VALUE</a></h3>
<p>The <tt>SELECT VALUE</tt> clause returns an array or multiset that contains the results of evaluating the <tt>VALUE</tt> expression, with one evaluation being performed per &#x201c;binding tuple&#x201d; (i.e., per <tt>FROM</tt> clause item) satisfying the statement&#x2019;s selection criteria. If there is no <tt>FROM</tt> clause, the expression after <tt>VALUE</tt> is evaluated once with no binding tuples (except those inherited from an outer environment).</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.1)</p>
<div>
<div>
<pre class="source">SELECT VALUE 1;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
1
]
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.2) The following query returns the names of all customers whose rating is above 650.</p>
<div>
<div>
<pre class="source">FROM customers AS c
WHERE c.rating &gt; 650
SELECT VALUE name;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
&quot;T. Cody&quot;,
&quot;M. Sinclair&quot;,
&quot;T. Henry&quot;
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="SQL-style_SELECT"></a><a name="SQL_select" id="SQL_select">SQL-style SELECT</a></h3>
<p>Traditional SQL-style <tt>SELECT</tt> syntax is also supported in SQL++, however the result of a query is not guaranteed to preserve the order of expressions in the <tt>SELECT</tt> clause.</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.3) The following query returns the names and customers ids of any customers whose rating is 750.</p>
<div>
<div>
<pre class="source">FROM customers AS c
WHERE c.rating = 750
SELECT c.name AS customer_name, c.custid AS customer_id;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;customer_id&quot;: &quot;C13&quot;,
&quot;customer_name&quot;: &quot;T. Cody&quot;
},
{
&quot;customer_id&quot;: &quot;C37&quot;,
&quot;customer_name&quot;: &quot;T. Henry&quot;
}
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="SELECT_.2A"></a><a name="Select_star" id="Select_star">SELECT *</a></h3>
<p>As in SQL, the phrase <tt>SELECT *</tt> suggests, &#x201c;select everything.&#x201d;</p>
<p>For each binding tuple in the stream, <tt>SELECT *</tt> produces an output object. For each variable in the binding tuple, the output object contains a field: the name of the field is the name of the variable, and the value of the field is the value of the variable. Essentially, <tt>SELECT *</tt> means, &#x201c;return all the bound variables, with their names and values.&#x201d;</p>
<p>The effect of <tt>SELECT *</tt> can be illustrated by an example based on two small collections named <tt>ages</tt> and <tt>eyes</tt>. The contents of the two collections are as follows:</p>
<p><tt>ages</tt>:</p>
<div>
<div>
<pre class="source">[
{ &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
{ &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
]
</pre></div></div>
<p><tt>eyes</tt>:</p>
<div>
<div>
<pre class="source">[
{ &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;brown&quot; },
{ &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;blue&quot; }
]
</pre></div></div>
<p>The following example applies <tt>SELECT *</tt> to a single collection.</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.4a) Return all the information in the <tt>ages</tt> collection.</p>
<div>
<div>
<pre class="source">FROM ages AS a
SELECT * ;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{ &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
},
{ &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32}
}
]
</pre></div></div>
<p>Note that the variable-name <tt>a</tt> appears in the query result. If the <tt>FROM</tt> clause had been simply <tt>FROM ages</tt> (omitting <tt>AS a</tt>), the variable-name in the query result would have been <tt>ages</tt>.</p>
<p>The next example applies <tt>SELECT *</tt> to a join of two collections.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.4b) Return all the information in a join of <tt>ages</tt> and <tt>eyes</tt> on matching name fields.</p>
<div>
<div>
<pre class="source">FROM ages AS a, eyes AS e
WHERE a.name = e.name
SELECT * ;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{ &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
&quot;e&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;Brown&quot; }
},
{ &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 },
&quot;e&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;Blue&quot; }
}
]
</pre></div></div>
<p>Note that the result of <tt>SELECT *</tt> in SQL++ is more complex than the result of <tt>SELECT *</tt> in SQL.</p></div></div></div>
<div class="section">
<h3><a name="SELECT_variable..2A"></a><a name="Select_variable_star" id="Select_variable_star">SELECT <i>variable</i>.*</a></h3>
<p>SQL++ has an alternative version of <tt>SELECT *</tt> in which the star is preceded by a variable.</p>
<p>Whereas the version without a named variable means, &#x201c;return all the bound variables, with their names and values,&#x201d; <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> means &#x201c;return only the named variable, and return only its value, not its name.&#x201d;</p>
<p>The following example can be compared with (Q3.4a) to see the difference between the two versions of <tt>SELECT *</tt>:</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.4c) Return all information in the <tt>ages</tt> collection.</p>
<div>
<div>
<pre class="source">FROM ages AS a
SELECT a.*
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{ &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
{ &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
]
</pre></div></div>
<p>Note that, for queries over a single collection, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> returns a simpler result and therefore may be preferable to <tt>SELECT *</tt>.</p>
<p>In fact, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt>, like <tt>SELECT *</tt> in SQL, is equivalent to a <tt>SELECT</tt> clause that enumerates all the fields of the collection, as in (Q3.4d):</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.4d) Return all the information in the <tt>ages</tt> collection.</p>
<div>
<div>
<pre class="source">FROM ages AS a
SELECT a.name, a.age
</pre></div></div>
<p>(same result as (Q3.4c))</p>
<p><tt>SELECT</tt> <i>variable</i> <tt>.*</tt> has an additional application. It can be used to return all the fields of a nested object. To illustrate this use, we will use the <tt>customers</tt> dataset in the example database &#x2014; see <a href="#Manual_data">Appendix 4</a>.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.4e) In the <tt>customers</tt> dataset, return all the fields of the <tt>address</tt> objects that have zipcode &#x201c;02340&#x201d;.</p>
<div>
<div>
<pre class="source">FROM customers AS c
WHERE c.address.zipcode = &quot;02340&quot;
SELECT address.* ;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;street&quot;: &quot;690 River St.&quot;,
&quot;city&quot;: &quot;Hanover, MA&quot;,
&quot;zipcode&quot;: &quot;02340&quot;
}
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="SELECT_DISTINCT"></a><a name="Select_distinct" id="Select_distinct">SELECT DISTINCT</a></h3>
<p>The <tt>DISTINCT</tt> keyword is used to eliminate duplicate items from the results of a query block.</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.5) Returns all of the different cities in the <tt>customers</tt> dataset.</p>
<div>
<div>
<pre class="source">FROM customers AS c
SELECT DISTINCT c.address.city;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;city&quot;: &quot;Boston, MA&quot;
},
{
&quot;city&quot;: &quot;Hanover, MA&quot;
},
{
&quot;city&quot;: &quot;St. Louis, MO&quot;
},
{
&quot;city&quot;: &quot;Rome, Italy&quot;
}
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Unnamed_Projections"></a><a name="Unnamed_projections" id="Unnamed_projections">Unnamed Projections</a></h3>
<p>Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed <tt>SELECT</tt> clause items), for which names are generated rather than user-provided. Name generation has three cases:</p>
<ul>
<li>If a projection expression is a variable reference expression, its generated name is the name of the variable.</li>
<li>If a projection expression is a field access expression, its generated name is the last identifier in the expression.</li>
<li>For all other cases, the query processor will generate a unique name.</li>
</ul>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.6) Returns the last digit and the order date of all orders for the customer whose ID is &#x201c;C41&#x201d;.</p>
<div>
<div>
<pre class="source">FROM orders AS o
WHERE o.custid = &quot;C41&quot;
SELECT o.orderno % 1000, o.order_date;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;$1&quot;: 1,
&quot;order_date&quot;: &quot;2020-04-29&quot;
},
{
&quot;$1&quot;: 6,
&quot;order_date&quot;: &quot;2020-09-02&quot;
}
]
</pre></div></div>
<p>In the result, <tt>$1</tt> is the generated name for <tt>o.orderno % 1000</tt>, while <tt>order_date</tt> is the generated name for <tt>o.order_date</tt>. It is good practice, however, to not rely on the randomly generated names which can be confusing and irrelevant. Instead, practice good naming conventions by providing a meaningful and concise name which properly describes the selected item.</p></div></div></div>
<div class="section">
<h3><a name="Abbreviated_Field_Access_Expressions"></a><a name="Abbreviated_field_access_expressions" id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></h3>
<p>As in standard SQL, field access expressions can be abbreviated when there is no ambiguity. In the next example, the variable <tt>o</tt> is the only possible variable reference for fields <tt>orderno</tt> and <tt>order_date</tt> and thus could be omitted in the query. This practice is not recommended, however, as queries may have fields (such as <tt>custid</tt>) which can be present in multiple datasets. More information on abbreviated field access can be found in the appendix section on Variable Resolution.</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.7) Same as Q3.6, omitting the variable reference for the order number and date and providing custom names for <tt>SELECT</tt> clause items.</p>
<div>
<div>
<pre class="source">FROM orders AS o
WHERE o.custid = &quot;C41&quot;
SELECT orderno % 1000 AS last_digit, order_date;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;last_digit&quot;: 1,
&quot;order_date&quot;: &quot;2020-04-29&quot;
},
{
&quot;last_digit&quot;: 6,
&quot;order_date&quot;: &quot;2020-09-02&quot;
}
]
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="FROM_Clause"></a><a name="From_clauses" id="From_clauses">FROM Clause</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="FromClause"></a>FromClause</h5>
<p><img src="../images/diagrams/FromClause.png" alt="" /></p></div>
<div class="section">
<h5><a name="FromTerm"></a>FromTerm</h5>
<p><img src="../images/diagrams/FromTerm.png" alt="" /></p></div>
<div class="section">
<h5><a name="NamedExpr"></a>NamedExpr</h5>
<p><img src="../images/diagrams/NamedExpr.png" alt="" /></p></div>
<div class="section">
<h5><a name="JoinStep"></a>JoinStep</h5>
<p><img src="../images/diagrams/JoinStep.png" alt="" /></p></div>
<div class="section">
<h5><a name="UnnestStep"></a>UnnestStep</h5>
<p><img src="../images/diagrams/UnnestStep.png" alt="" /></p>
<p>Synonyms for <tt>UNNEST</tt>: <tt>CORRELATE</tt>, <tt>FLATTEN</tt></p>
<p>The purpose of a <tt>FROM</tt> clause is to iterate over a collection, binding a variable to each item in turn. Here&#x2019;s a query that iterates over the <tt>customers</tt> dataset, choosing certain customers and returning some of their attributes.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.8) List the customer ids and names of the customers in zipcode 63101, in order by their customer IDs.</p>
<div>
<div>
<pre class="source">FROM customers
WHERE address.zipcode = &quot;63101&quot;
SELECT custid AS customer_id, name
ORDER BY customer_id;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;customer_id&quot;: &quot;C13&quot;,
&quot;name&quot;: &quot;T. Cody&quot;
},
{
&quot;customer_id&quot;: &quot;C31&quot;,
&quot;name&quot;: &quot;B. Pruitt&quot;
},
{
&quot;customer_id&quot;: &quot;C41&quot;,
&quot;name&quot;: &quot;R. Dodge&quot;
}
]
</pre></div></div>
<p>Let&#x2019;s take a closer look at what this <tt>FROM</tt> clause is doing. A <tt>FROM</tt> clause always produces a stream of bindings, in which an iteration variable is bound in turn to each item in a collection. In Q3.8, since no explicit iteration variable is provided, the <tt>FROM</tt> clause defines an implicit variable named <tt>customers</tt>, the same name as the dataset that is being iterated over. The implicit iteration variable serves as the object-name for all field-names in the query block that do not have explicit object-names. Thus, <tt>address.zipcode</tt> really means <tt>customers.address.zipcode</tt>, <tt>custid</tt> really means <tt>customers.custid</tt>, and <tt>name</tt> really means <tt>customers.name</tt>.</p>
<p>You may also provide an explicit iteration variable, as in this version of the same query:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.9) Alternative version of Q3.8 (same result).</p>
<div>
<div>
<pre class="source">FROM customers AS c
WHERE c.address.zipcode = &quot;63101&quot;
SELECT c.custid AS customer_id, c.name
ORDER BY customer_id;
</pre></div></div>
<p>In Q3.9, the variable <tt>c</tt> is bound to each <tt>customer</tt> object in turn as the query iterates over the <tt>customers</tt> dataset. An explicit iteration variable can be used to identify the fields of the referenced object, as in <tt>c.name</tt> in the <tt>SELECT</tt> clause of Q3.9. When referencing a field of an object, the iteration variable can be omitted when there is no ambiguity. For example, <tt>c.name</tt> could be replaced by <tt>name</tt> in the <tt>SELECT</tt> clause of Q3.9. That&#x2019;s why field-names like <tt>name</tt> and <tt>custid</tt> could stand by themselves in the Q3.8 version of this query.</p>
<p>In the examples above, the <tt>FROM</tt> clause iterates over the objects in a dataset. But in general, a <tt>FROM</tt> clause can iterate over any collection. For example, the objects in the <tt>orders</tt> dataset each contain a field called <tt>items</tt>, which is an array of nested objects. In some cases, you will write a <tt>FROM</tt> clause that iterates over a nested array like <tt>items</tt>.</p>
<p>The stream of objects (more accurately, variable bindings) that is produced by the <tt>FROM</tt> clause does not have any particular order. The system will choose the most efficient order for the iteration. If you want your query result to have a specific order, you must use an <tt>ORDER BY</tt> clause.</p>
<p>It&#x2019;s good practice to specify an explicit iteration variable for each collection in the <tt>FROM</tt> clause, and to use these variables to qualify the field-names in other clauses. Here are some reasons for this convention:</p>
<ul>
<li>
<p>It&#x2019;s nice to have different names for the collection as a whole and an object in the collection. For example, in the clause <tt>FROM customers AS c</tt>, the name <tt>customers</tt> represents the dataset and the name <tt>c</tt> represents one object in the dataset.</p>
</li>
<li>
<p>In some cases, iteration variables are required. For example, when joining a dataset to itself, distinct iteration variables are required to distinguish the left side of the join from the right side.</p>
</li>
<li>
<p>In a subquery it&#x2019;s sometimes necessary to refer to an object in an outer query block (this is called a <i>correlated subquery</i>). To avoid confusion in correlated subqueries, it&#x2019;s best to use explicit variables.</p>
</li>
</ul></div></div></div>
<div class="section">
<h3><a name="Joins"></a><a name="Join_clauses" id="Join_clauses">Joins</a></h3>
<p>A <tt>FROM</tt> clause gets more interesting when there is more than one collection involved. The following query iterates over two collections: <tt>customers</tt> and <tt>orders</tt>. The <tt>FROM</tt> clause produces a stream of binding tuples, each containing two variables, <tt>c</tt> and <tt>o</tt>. In each binding tuple, <tt>c</tt> is bound to an object from <tt>customers</tt>, and <tt>o</tt> is bound to an object from <tt>orders</tt>. Conceptually, at this point, the binding tuple stream contains all possible pairs of a customer and an order (this is called the <i>Cartesian product</i> of <tt>customers</tt> and <tt>orders</tt>). Of course, we are interested only in pairs where the <tt>custid</tt> fields match, and that condition is expressed in the <tt>WHERE</tt> clause, along with the restriction that the order number must be 1001.</p>
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.10) Create a packing list for order number 1001, showing the customer name and address and all the items in the order.</p>
<div>
<div>
<pre class="source">FROM customers AS c, orders AS o
WHERE c.custid = o.custid
AND o.orderno = 1001
SELECT o.orderno,
c.name AS customer_name,
c.address,
o.items AS items_ordered;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1001,
&quot;customer_name&quot;: &quot;R. Dodge&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;150 Market St.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;,
&quot;zipcode&quot;: &quot;63101&quot;
},
&quot;items_ordered&quot;: [
{
&quot;itemno&quot;: 347,
&quot;qty&quot;: 5,
&quot;price&quot;: 19.99
},
{
&quot;itemno&quot;: 193,
&quot;qty&quot;: 2,
&quot;price&quot;: 28.89
}
]
}
]
</pre></div></div>
<p>Q3.10 is called a <i>join query</i> because it joins the <tt>customers</tt> collection and the <tt>orders</tt> collection, using the join condition <tt>c.custid = o.custid</tt>. In SQL++, as in SQL, you can express this query more explicitly by a <tt>JOIN</tt> clause that includes the join condition, as follows:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.11) Alternative statement of Q3.10 (same result).</p>
<div>
<div>
<pre class="source">FROM customers AS c JOIN orders AS o
ON c.custid = o.custid
WHERE o.orderno = 1001
SELECT o.orderno,
c.name AS customer_name,
c.address,
o.items AS items_ordered;
</pre></div></div>
<p>Whether you express the join condition in a <tt>JOIN</tt> clause or in a <tt>WHERE</tt> clause is a matter of taste; the result is the same. This manual will generally use a comma-separated list of collection-names in the <tt>FROM</tt> clause, leaving the join condition to be expressed elsewhere. As we&#x2019;ll soon see, in some query blocks the join condition can be omitted entirely.</p>
<p>There is, however, one case in which an explicit <tt>JOIN</tt> clause is necessary. That is when you need to join collection A to collection B, and you want to make sure that every item in collection A is present in the query result, even if it doesn&#x2019;t match any item in collection B. This kind of query is called a <i>left outer join</i>, and it is illustrated by the following example.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.12) List the customer ID and name, together with the order numbers and dates of their orders (if any) of customers T. Cody and M. Sinclair.</p>
<div>
<div>
<pre class="source">FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
WHERE c.name = &quot;T. Cody&quot;
OR c.name = &quot;M. Sinclair&quot;
SELECT c.custid, c.name, o.orderno, o.order_date
ORDER BY c.custid, o.order_date;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;orderno&quot;: 1002,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;order_date&quot;: &quot;2020-05-01&quot;
},
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;orderno&quot;: 1007,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;order_date&quot;: &quot;2020-09-13&quot;
},
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;orderno&quot;: 1008,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;order_date&quot;: &quot;2020-10-13&quot;
},
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;orderno&quot;: 1009,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;order_date&quot;: &quot;2020-10-13&quot;
},
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;
}
]
</pre></div></div>
<p>As you can see from the result of this left outer join, our data includes four orders from customer T. Cody, but no orders from customer M. Sinclair. The behavior of left outer join in SQL++ is different from that of SQL. SQL would have provided M. Sinclair with an order in which all the fields were <tt>null</tt>. SQL++, on the other hand, deals with schemaless data, which permits it to simply omit the order fields from the outer join.</p>
<p>Now we&#x2019;re ready to look at a new kind of join that was not provided (or needed) in original SQL. Consider this query:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.13) For every case in which an item is ordered in a quantity greater than 100, show the order number, date, item number, and quantity.</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
WHERE i.qty &gt; 100
SELECT o.orderno, o.order_date, i.itemno AS item_number,
i.qty AS quantity
ORDER BY o.orderno, item_number;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1002,
&quot;order_date&quot;: &quot;2020-05-01&quot;,
&quot;item_number&quot;: 680,
&quot;quantity&quot;: 150
},
{
&quot;orderno&quot;: 1005,
&quot;order_date&quot;: &quot;2020-08-30&quot;,
&quot;item_number&quot;: 347,
&quot;quantity&quot;: 120
},
{
&quot;orderno&quot;: 1006,
&quot;order_date&quot;: &quot;2020-09-02&quot;,
&quot;item_number&quot;: 460,
&quot;quantity&quot;: 120
}
]
</pre></div></div>
<p>Q3.13 illustrates a feature called <i>left-correlation</i> in the <tt>FROM</tt> clause. Notice that we are joining <tt>orders</tt>, which is a dataset, to <tt>items</tt>, which is an array nested inside each order. In effect, for each order, we are unnesting the <tt>items</tt> array and joining it to the <tt>order</tt> as though it were a separate collection. For this reason, this kind of query is sometimes called an <i>unnesting query</i>. The keyword <tt>UNNEST</tt> may be used whenever left-correlation is used in a <tt>FROM</tt> clause, as shown in this example:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.14) Alternative statement of Q3.13 (same result).</p>
<div>
<div>
<pre class="source">FROM orders AS o UNNEST o.items AS i
WHERE i.qty &gt; 100
SELECT o.orderno, o.order_date, i.itemno AS item_number,
i.qty AS quantity
ORDER BY o.orderno, item_number;
</pre></div></div>
<p>The results of Q3.13 and Q3.14 are exactly the same. <tt>UNNEST</tt> serves as a reminder that left-correlation is being used to join an object with its nested items. The join condition in Q3.14 is expressed by the left-correlation: each order <tt>o</tt> is joined to its own items, referenced as <tt>o.items</tt>. The result of the <tt>FROM</tt> clause is a stream of binding tuples, each containing two variables, <tt>o</tt> and <tt>i</tt>. The variable <tt>o</tt> is bound to an order and the variable <tt>i</tt> is bound to one item inside that order.</p>
<p>Like <tt>JOIN</tt>, <tt>UNNEST</tt> has a <tt>LEFT OUTER</tt> option. Q3.14 could have specified:</p>
<div>
<div>
<pre class="source">FROM orders AS o LEFT OUTER UNNEST o.items AS i
</pre></div></div>
<p>In this case, orders that have no nested items would appear in the query result.</p></div></div></div></div>
<div class="section">
<h2><a name="LET_Clause"></a><a name="Let_clauses" id="Let_clauses">LET Clause</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="LetClause"></a>LetClause</h5>
<p><img src="../images/diagrams/LetClause.png" alt="" /></p>
<p>Synonyms for <tt>LET</tt>: <tt>LETTING</tt></p>
<p><tt>LET</tt> clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The word <tt>LETTING</tt> can also be used, although this is not as common. The next query shows an example.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.15) For each item in an order, the revenue is defined as the quantity times the price of that item. Find individual items for which the revenue is greater than 5000. For each of these, list the order number, item number, and revenue, in descending order by revenue.</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
LET revenue = i.qty * i.price
WHERE revenue &gt; 5000
SELECT o.orderno, i.itemno, revenue
ORDER by revenue desc;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1006,
&quot;itemno&quot;: 460,
&quot;revenue&quot;: 11997.6
},
{
&quot;orderno&quot;: 1002,
&quot;itemno&quot;: 460,
&quot;revenue&quot;: 9594.05
},
{
&quot;orderno&quot;: 1006,
&quot;itemno&quot;: 120,
&quot;revenue&quot;: 5525
}
]
</pre></div></div>
<p>The expression for computing revenue is defined once in the <tt>LET</tt> clause and then used three times in the remainder of the query. Avoiding repetition of the revenue expression makes the query shorter and less prone to errors.</p></div></div></div></div>
<div class="section">
<h2><a name="WHERE_Clause"></a><a name="Where_having_clauses" id="Where_having_clauses">WHERE Clause</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="WhereClause"></a>WhereClause</h5>
<p><img src="../images/diagrams/WhereClause.png" alt="" /></p>
<p>The purpose of a <tt>WHERE</tt> clause is to operate on the stream of binding tuples generated by the <tt>FROM</tt> clause, filtering out the tuples that do not satisfy a certain condition. The condition is specified by an expression based on the variable names in the binding tuples. If the expression evaluates to true, the tuple remains in the stream; if it evaluates to anything else, including <tt>null</tt> or <tt>missing</tt>, it is filtered out. The surviving tuples are then passed along to the next clause to be processed (usually either <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
<p>Often, the expression in a <tt>WHERE</tt> clause is some kind of comparison like <tt>quantity &gt; 100</tt>. However, any kind of expression is allowed in a <tt>WHERE</tt> clause. The only thing that matters is whether the expression returns <tt>true</tt> or not.</p></div></div></div></div>
<div class="section">
<h2><a name="Grouping" id="Grouping">Grouping</a></h2>
<p>Grouping is especially important when manipulating hierarchies like the ones that are often found in JSON data. Often you will want to generate output data that includes both summary data and line items within the summaries. For this purpose, SQL++ supports several important extensions to the traditional grouping features of SQL. The familiar <tt>GROUP BY</tt> and <tt>HAVING</tt> clauses are still there, and they are joined by a new clause called <tt>GROUP AS</tt>. We&#x2019;ll illustrate these clauses by a series of examples.</p>
<div class="section">
<h3><a name="GROUP_BY_Clause"></a><a name="Group_By_clauses" id="Group_By_clauses">GROUP BY Clause</a></h3>
<div class="section">
<div class="section">
<h5><a name="GroupByClause"></a>GroupByClause</h5>
<p><img src="../images/diagrams/GroupByClause.png" alt="" /></p></div>
<div class="section">
<h5><a name="GroupingElement"></a>GroupingElement</h5>
<p><img src="../images/diagrams/GroupingElement.png" alt="" /></p></div>
<div class="section">
<h5><a name="OrdinaryGroupingSet"></a>OrdinaryGroupingSet</h5>
<p><img src="../images/diagrams/OrdinaryGroupingSet.png" alt="" /></p></div>
<div class="section">
<h5><a name="NamedExpr"></a>NamedExpr</h5>
<p><img src="../images/diagrams/NamedExpr.png" alt="" /></p>
<p>We&#x2019;ll begin our discussion of grouping with an example from ordinary SQL.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.16) List the number of orders placed by each customer who has placed an order.</p>
<div>
<div>
<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
FROM orders AS o
GROUP BY o.custid
ORDER BY o.custid;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;order count&quot;: 4,
&quot;custid&quot;: &quot;C13&quot;
},
{
&quot;order count&quot;: 1,
&quot;custid&quot;: &quot;C31&quot;
},
{
&quot;order count&quot;: 1,
&quot;custid&quot;: &quot;C35&quot;
},
{
&quot;order count&quot;: 1,
&quot;custid&quot;: &quot;C37&quot;
},
{
&quot;order count&quot;: 2,
&quot;custid&quot;: &quot;C41&quot;
}
]
</pre></div></div>
<p>The input to a <tt>GROUP BY</tt> clause is the stream of binding tuples generated by the <tt>FROM</tt> and <tt>WHERE</tt>clauses. In this query, before grouping, the variable <tt>o</tt> is bound to each object in the <tt>orders</tt> collection in turn.</p>
<p>SQL++ evaluates the expression in the <tt>GROUP BY</tt> clause, called the grouping expression, once for each of the binding tuples. It then organizes the results into groups in which the grouping expression has a common value (as defined by the <tt>=</tt> operator). In this example, the grouping expression is <tt>o.custid</tt>, and each of the resulting groups is a set of <tt>orders</tt> that have the same <tt>custid</tt>. If necessary, a group is formed for <tt>orders</tt> in which <tt>custid</tt> is <tt>null</tt>, and another group is formed for <tt>orders</tt> that have no <tt>custid</tt>. This query uses the aggregating function <tt>COUNT(o.orderno)</tt>, which counts how many order numbers are in each group. If we are sure that each order object has a distinct <tt>orderno</tt>, we could also simply count the order objects in each group by using <tt>COUNT(*)</tt> in place of <tt>COUNT(o.orderno)</tt>.</p>
<p>In the <tt>GROUP BY</tt>clause, you may optionally define an alias for the grouping expression. For example, in Q3.16, you could have written <tt>GROUP BY o.custid AS cid</tt>. The alias <tt>cid</tt> could then be used in place of the grouping expression in later clauses. In cases where the grouping expression contains an operator, it is especially helpful to define an alias (for example, <tt>GROUP BY salary + bonus AS pay)</tt>.</p>
<p>Q3.16 had a single grouping expression, <tt>o.custid</tt>. If a query has multiple grouping expressions, the combination of grouping expressions is evaluated for every binding tuple, and the stream of binding tuples is partitioned into groups that have values in common for all of the grouping expressions. We&#x2019;ll see an example of such a query in Q3.18.</p>
<p>After grouping, the number of binding tuples is reduced: instead of a binding tuple for each of the input objects, there is a binding tuple for each group. The grouping expressions (identified by their aliases, if any) are bound to the results of their evaluations. However, all the non-grouping fields (that is, fields that were not named in the grouping expressions), are accessible only in a special way: as an argument of one of the special aggregation pseudo-functions such as: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, <tt>STDEV</tt> and <tt>COUNT</tt>. The clauses that come after grouping can access only properties of groups, including the grouping expressions and aggregate properties of the groups such as <tt>COUNT(o.orderno)</tt> or <tt>COUNT(*)</tt>. (We&#x2019;ll see an exception when we discuss the new <tt>GROUP AS</tt> clause.)</p>
<p>You may notice that the results of Q3.16 do not include customers who have no <tt>orders</tt>. If we want to include these <tt>customers</tt>, we need to use an outer join between the <tt>customers</tt> and <tt>orders</tt> collections. This is illustrated by the following example, which also includes the name of each customer.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.17) List the number of orders placed by each customer including those customers who have placed no orders.</p>
<div>
<div>
<pre class="source">SELECT c.custid, c.name, COUNT(o.orderno) AS `order count`
FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
GROUP BY c.custid, c.name
ORDER BY c.custid;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;order count&quot;: 4,
&quot;name&quot;: &quot;T. Cody&quot;
},
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;order count&quot;: 0,
&quot;name&quot;: &quot;M. Sinclair&quot;
},
{
&quot;custid&quot;: &quot;C31&quot;,
&quot;order count&quot;: 1,
&quot;name&quot;: &quot;B. Pruitt&quot;
},
{
&quot;custid&quot;: &quot;C35&quot;,
&quot;order count&quot;: 1,
&quot;name&quot;: &quot;J. Roberts&quot;
},
{
&quot;custid&quot;: &quot;C37&quot;,
&quot;order count&quot;: 1,
&quot;name&quot;: &quot;T. Henry&quot;
},
{
&quot;custid&quot;: &quot;C41&quot;,
&quot;order count&quot;: 2,
&quot;name&quot;: &quot;R. Dodge&quot;
},
{
&quot;custid&quot;: &quot;C47&quot;,
&quot;order count&quot;: 0,
&quot;name&quot;: &quot;S. Logan&quot;
}
]
</pre></div></div>
<p>Notice in Q3.17 what happens when the special aggregation function <tt>COUNT</tt> is applied to a collection that does not exist, such as the orders of M. Sinclair: it returns zero. This behavior is unlike that of the other special aggregation functions <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, and <tt>MIN</tt>, which return <tt>null</tt> if their operand does not exist. This should make you cautious about the <tt>COUNT</tt> function: If it returns zero, that may mean that the collection you are counting has zero members, or that it does not exist, or that you have misspelled the collection&#x2019;s name.</p>
<p>Q3.17 also shows how a query block can have more than one grouping expression. In general, the <tt>GROUP BY</tt>clause produces a binding tuple for each different combination of values for the grouping expressions. In Q3.17, the <tt>c.custid</tt> field uniquely identifies a customer, so adding <tt>c.name</tt> as a grouping expression does not result in any more groups. Nevertheless, <tt>c.name</tt> must be included as a grouping expression if it is to be referenced outside (after) the <tt>GROUP BY</tt> clause. If <tt>c.name</tt> were not included in the <tt>GROUP BY</tt> clause, it would not be a group property and could not be used in the <tt>SELECT</tt> clause.</p>
<p>Of course, a grouping expression need not be a simple field-name. In Q3.18, orders are grouped by month, using a temporal function to extract the month component of the order dates. In cases like this, it is helpful to define an alias for the grouping expression so that it can be referenced elsewhere in the query e.g. in the <tt>SELECT</tt> clause.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.18) Find the months in 2020 that had the largest numbers of orders; list the months and their numbers of orders. (Return the top three.)</p>
<div>
<div>
<pre class="source">FROM orders AS o
WHERE get_year(date(o.order_date)) = 2020
GROUP BY get_month(date(o.order_date)) AS month
SELECT month, COUNT(*) AS order_count
ORDER BY order_count DESC, month DESC
LIMIT 3;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;month&quot;: 10,
&quot;order_count&quot;: 2
},
{
&quot;month&quot;: 9,
&quot;order_count&quot;: 2
},
{
&quot;month&quot;: 8,
&quot;order_count&quot;: 1
}
]
</pre></div></div>
<p>Groups are commonly formed from named collections like <tt>customers</tt> and <tt>orders</tt>. But in some queries you need to form groups from a collection that is nested inside another collection, such as <tt>items</tt> inside <tt>orders</tt>. In SQL++ you can do this by using left-correlation in the <tt>FROM</tt> clause to unnest the inner collection, joining the inner collection with the outer collection, and then performing the grouping on the join, as illustrated in Q3.19.</p>
<p>Q3.19 also shows how a <tt>LET</tt> clause can be used after a <tt>GROUP BY</tt> clause to define an expression that is referenced multiple times in later clauses.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.19) For each order, define the total revenue of the order as the sum of quantity times price for all the items in that order. List the total revenue for all the orders placed by the customer with id &#x201c;C13&#x201d;, in descending order by total revenue.</p>
<div>
<div>
<pre class="source">FROM orders as o, o.items as i
WHERE o.custid = &quot;C13&quot;
GROUP BY o.orderno
LET total_revenue = sum(i.qty * i.price)
SELECT o.orderno, total_revenue
ORDER BY total_revenue desc;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1002,
&quot;total_revenue&quot;: 10906.55
},
{
&quot;orderno&quot;: 1008,
&quot;total_revenue&quot;: 1999.8
},
{
&quot;orderno&quot;: 1007,
&quot;total_revenue&quot;: 130.45
}
]
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="ROLLUP"></a><a name="Rollup" id="Rollup">ROLLUP</a></h4>
<p>The <tt>ROLLUP</tt> subclause is an aggregation feature that extends the functionality of the <tt>GROUP BY</tt> clause. It returns extra <i>super-aggregate</i> items in the query results, giving subtotals and a grand total for the aggregate functions in the query. To illustrate, first consider the following query.</p>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.R1) List the number of orders, grouped by customer region and city.</p>
<div>
<div>
<pre class="source">SELECT customer_region AS Region,
customer_city AS City,
COUNT(o.orderno) AS `Order Count`
FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
LET address_line = SPLIT(c.address.city, &quot;,&quot;),
customer_city = TRIM(address_line[0]),
customer_region = TRIM(address_line[1])
GROUP BY customer_region, customer_city
ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;Region&quot;: &quot;Italy&quot;,
&quot;City&quot;: &quot;Rome&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Boston&quot;,
&quot;Order Count&quot;: 2
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Hanover&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;City&quot;: &quot;St. Louis&quot;,
&quot;Order Count&quot;: 7
}
]
</pre></div></div>
<p>This query uses string functions to split each customer&#x2019;s address into city and region. The query then counts the total number of orders placed by each customer, and groups the results first by customer region, then by customer city. The aggregate results (labeled <tt>Order Count</tt>) are only shown by city, and there are no subtotals or grand total. We can add these using the <tt>ROLLUP</tt> subclause, as in the following example.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.R2) List the number of orders by customer region and city, including subtotals and a grand total.</p>
<div>
<div>
<pre class="source">SELECT customer_region AS Region,
customer_city AS City,
COUNT(o.orderno) AS `Order Count`
FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
LET address_line = SPLIT(c.address.city, &quot;,&quot;),
customer_city = TRIM(address_line[0]),
customer_region = TRIM(address_line[1])
GROUP BY ROLLUP(customer_region, customer_city)
ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;Region&quot;: null,
&quot;City&quot;: null,
&quot;Order Count&quot;: 9
},
{
&quot;Region&quot;: &quot;Italy&quot;,
&quot;City&quot;: null,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;Italy&quot;,
&quot;City&quot;: &quot;Rome&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: null,
&quot;Order Count&quot;: 2
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Boston&quot;,
&quot;Order Count&quot;: 2
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Hanover&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;City&quot;: null,
&quot;Order Count&quot;: 7
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;City&quot;: &quot;St. Louis&quot;,
&quot;Order Count&quot;: 7
}
]
</pre></div></div>
<p>With the addition of the <tt>ROLLUP</tt> subclause, the results now include an extra item at the start of each region, giving the subtotal for that region. There is also another extra item at the very start of the results, giving the grand total for all regions.</p>
<p>The order of the fields specified by the <tt>ROLLUP</tt> subclause determines the hierarchy of the super-aggregate items. The customer region is specified first, followed by the customer city; so the results are aggregated by region first, and then by city within each region.</p>
<p>The grand total returns <tt>null</tt> as a value for the city and the region, and the subtotals return <tt>null</tt> as the value for the city, which may make the results hard to understand at first glance. A workaround for this is given in the next example.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.R3) List the number of orders by customer region and city, with meaningful subtotals and grand total.</p>
<div>
<div>
<pre class="source">SELECT IFNULL(customer_region, &quot;All regions&quot;) AS Region,
IFNULL(customer_city, &quot;All cities&quot;) AS City,
COUNT(o.orderno) AS `Order Count`
FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
LET address_line = SPLIT(c.address.city, &quot;,&quot;),
customer_city = TRIM(address_line[0]),
customer_region = TRIM(address_line[1])
GROUP BY ROLLUP(customer_region, customer_city)
ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;City&quot;: &quot;All cities&quot;,
&quot;Order Count&quot;: 9
},
{
&quot;Region&quot;: &quot;Italy&quot;,
&quot;City&quot;: &quot;All cities&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;Italy&quot;,
&quot;City&quot;: &quot;Rome&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;All cities&quot;,
&quot;Order Count&quot;: 2
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Boston&quot;,
&quot;Order Count&quot;: 2
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;City&quot;: &quot;Hanover&quot;,
&quot;Order Count&quot;: 0
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;City&quot;: &quot;All cities&quot;,
&quot;Order Count&quot;: 7
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;City&quot;: &quot;St. Louis&quot;,
&quot;Order Count&quot;: 7
}
]
</pre></div></div>
<p>This query uses the <tt>IFNULL</tt> function to populate the region and city fields with meaningful values for the super-aggregate items. This makes the results clearer and more readable.</p></div></div>
<div class="section">
<h4><a name="CUBE"></a><a name="Cube" id="Cube">CUBE</a></h4>
<p>The <tt>CUBE</tt> subclause is similar to the <tt>ROLLUP</tt> subclause, in that it returns extra super-aggregate items in the query results, giving subtotals and a grand total for the aggregate functions. Whereas <tt>ROLLUP</tt> returns a grand total and a hierarchy of subtotals based on the specified fields, the <tt>CUBE</tt> subclause returns a grand total and subtotals for every possible combination of the specified fields.</p>
<p>The following example is a modification of Q3.R3 which illustrates the <tt>CUBE</tt> subclause.</p>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.C) List the number of orders by customer region and order date, with all possible subtotals and a grand total.</p>
<div>
<div>
<pre class="source">SELECT IFNULL(customer_region, &quot;All regions&quot;) AS Region,
IFNULL(order_month, &quot;All months&quot;) AS Month,
COUNT(o.orderno) AS `Order Count`
FROM customers AS c INNER JOIN orders AS o ON c.custid = o.custid
LET address_line = SPLIT(c.address.city, &quot;,&quot;),
customer_region = TRIM(address_line[1]),
order_month = get_month(date(o.order_date))
GROUP BY CUBE(customer_region, order_month)
ORDER BY customer_region ASC, order_month ASC;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 9,
&quot;Month&quot;: &quot;All months&quot;
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 4
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 5
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 6
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 7
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 8
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 2,
&quot;Month&quot;: 9
},
{
&quot;Region&quot;: &quot;All regions&quot;,
&quot;Order Count&quot;: 2,
&quot;Month&quot;: 10
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;Order Count&quot;: 2,
&quot;Month&quot;: &quot;All months&quot;
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 7
},
{
&quot;Region&quot;: &quot;MA&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 8
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 7,
&quot;Month&quot;: &quot;All months&quot;
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 4
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 5
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 1,
&quot;Month&quot;: 6
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 2,
&quot;Month&quot;: 9
},
{
&quot;Region&quot;: &quot;MO&quot;,
&quot;Order Count&quot;: 2,
&quot;Month&quot;: 10
}
]
</pre></div></div>
<p>To simplify the results, this query uses an inner join, so that customers who have not placed an order are not included in the totals. The query uses string functions to extract the region from each customer&#x2019;s address, and a temporal function to extract the year from the order date.</p>
<p>The query uses the <tt>CUBE</tt> subclause with customer region and order month. This means that there are four possible aggregates to calculate:</p>
<ul>
<li>All regions, all months</li>
<li>All regions, each month</li>
<li>Each region, all months</li>
<li>Each region, each month</li>
</ul>
<p>The results start with the grand total, showing the total number of orders across all regions for all months. This is followed by date subtotals, showing the number of orders across all regions for each month. Following that are the regional subtotals, showing the total number of orders for all months in each region; and the result items, giving the number of orders for each month in each region.</p>
<p>The query also uses the <tt>IFNULL</tt> function to populate the region and date fields with meaningful values for the super-aggregate items. This makes the results clearer and more readable.</p></div></div></div>
<div class="section">
<h3><a name="HAVING_Clause"></a><a name="Having_clauses" id="Having_clauses">HAVING Clause</a></h3>
<div class="section">
<div class="section">
<h5><a name="HavingClause"></a>HavingClause</h5>
<p><img src="../images/diagrams/HavingClause.png" alt="" /></p>
<p>The <tt>HAVING</tt> clause is very similar to the <tt>WHERE</tt> clause, except that it comes after <tt>GROUP BY</tt> and applies a filter to groups rather than to individual objects. Here&#x2019;s an example of a <tt>HAVING</tt> clause that filters orders by applying a condition to their nested arrays of <tt>items</tt>.</p>
<p>By adding a <tt>HAVING</tt> clause to Q3.19, we can filter the results to include only those orders whose total revenue is greater than 1000, as shown in Q3.22.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.20) Modify Q3.19 to include only orders whose total revenue is greater than 5000.</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items as i
WHERE o.custid = &quot;C13&quot;
GROUP BY o.orderno
LET total_revenue = sum(i.qty * i.price)
HAVING total_revenue &gt; 5000
SELECT o.orderno, total_revenue
ORDER BY total_revenue desc;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1002,
&quot;total_revenue&quot;: 10906.55
}
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Aggregation_Pseudo-Functions"></a><a name="Aggregation_PseudoFunctions" id="Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a></h3>
<p>SQL provides several special functions for performing aggregations on groups including: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>COUNT</tt> (some implementations provide more). These same functions are supported in SQL++. However, it&#x2019;s worth spending some time on these special functions because they don&#x2019;t behave like ordinary functions. They are called &#x201c;pseudo-functions&#x201d; here because they don&#x2019;t evaluate their operands in the same way as ordinary functions. To see the difference, consider these two examples, which are syntactically similar:</p>
<div class="section">
<div class="section">
<h5><a name="Example_1"></a>Example 1</h5>
<div>
<div>
<pre class="source">SELECT LENGTH(name) FROM customers
</pre></div></div>
<p>In Example 1, <tt>LENGTH</tt> is an ordinary function. It simply evaluates its operand (name) and then returns a result computed from the operand.</p></div>
<div class="section">
<h5><a name="Example_2"></a>Example 2</h5>
<div>
<div>
<pre class="source">SELECT AVG(rating) FROM customers
</pre></div></div>
<p>The effect of <tt>AVG</tt> in Example 2 is quite different. Rather than performing a computation on an individual rating value, <tt>AVG</tt> has a global effect: it effectively restructures the query. As a pseudo-function, <tt>AVG</tt> requires its operand to be a group; therefore, it automatically collects all the rating values from the query block and forms them into a group.</p>
<p>The aggregation pseudo-functions always require their operand to be a group. In some queries, the group is explicitly generated by a <tt>GROUP BY</tt> clause, as in Q3.21:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.21) List the average credit rating of customers by zipcode.</p>
<div>
<div>
<pre class="source">FROM customers AS c
GROUP BY c.address.zipcode AS zip
SELECT zip, AVG(c.rating) AS `avg credit rating`
ORDER BY zip;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;avg credit rating&quot;: 625
},
{
&quot;avg credit rating&quot;: 657.5,
&quot;zip&quot;: &quot;02115&quot;
},
{
&quot;avg credit rating&quot;: 690,
&quot;zip&quot;: &quot;02340&quot;
},
{
&quot;avg credit rating&quot;: 695,
&quot;zip&quot;: &quot;63101&quot;
}
]
</pre></div></div>
<p>Note in the result of Q3.21 that one or more customers had no zipcode. These customers were formed into a group for which the value of the grouping key is missing. When the query results were returned in JSON format, the <tt>missing</tt> key simply does not appear. Also note that the group whose key is <tt>missing</tt> appears first because <tt>missing</tt> is considered to be smaller than any other value. If some customers had had <tt>null</tt> as a zipcode, they would have been included in another group, appearing after the <tt>missing</tt> group but before the other groups.</p>
<p>When an aggregation pseudo-function is used without an explicit <tt>GROUP BY</tt> clause, it implicitly forms the entire query block into a single group, as in Q3.22:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.22) Find the average credit rating among all customers.</p>
<div>
<div>
<pre class="source">FROM customers AS c
SELECT AVG(c.rating) AS `avg credit rating`;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;avg credit rating&quot;: 670
}
]
</pre></div></div>
<p>The aggregation pseudo-function <tt>COUNT</tt> has a special form in which its operand is <tt>*</tt> instead of an expression.</p>
<p>For example, <tt>SELECT COUNT(*) FROM customers</tt> simply returns the total number of customers, whereas <tt>SELECT COUNT(rating) FROM customers</tt> returns the number of customers who have known ratings (that is, their ratings are not <tt>null</tt> or <tt>missing</tt>).</p>
<p>Because the aggregation pseudo-functions sometimes restructure their operands, they can be used only in query blocks where (explicit or implicit) grouping is being done. Therefore the pseudo-functions cannot operate directly on arrays or multisets. For operating directly on JSON collections, SQL++ provides a set of ordinary functions for computing aggregations. Each ordinary aggregation function (except the ones corresponding to <tt>COUNT</tt> and <tt>ARRAY_AGG</tt>) has two versions: one that ignores <tt>null</tt> and <tt>missing</tt> values and one that returns <tt>null</tt> if a <tt>null</tt> or <tt>missing</tt> value is encountered anywhere in the collection. The names of the aggregation functions are as follows:</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Aggregation pseudo-function; operates on groups only </th>
<th> Ordinary function: Ignores NULL or MISSING values </th>
<th> Ordinary function: Returns NULL if NULL or MISSING are encountered</th></tr>
</thead><tbody>
<tr class="b">
<td>SUM</td>
<td> ARRAY_SUM</td>
<td> STRICT_SUM </td></tr>
<tr class="a">
<td> AVG </td>
<td>ARRAY_MAX</td>
<td> STRICT_MAX </td></tr>
<tr class="b">
<td> MAX </td>
<td> ARRAY_MIN</td>
<td> STRICT_MIN </td></tr>
<tr class="a">
<td> MIN </td>
<td> ARRAY_AVG</td>
<td> STRICT_AVG </td></tr>
<tr class="b">
<td> COUNT </td>
<td>ARRAY_COUNT</td>
<td>STRICT_COUNT (see exception below) </td></tr>
<tr class="a">
<td>STDDEV_SAMP</td>
<td>ARRAY_STDDEV_SAMP</td>
<td> STRICT_STDDEV_SAMP </td></tr>
<tr class="b">
<td>STDDEV_POP</td>
<td>ARRAY_STDDEV_POP</td>
<td> STRICT_STDDEV_POP </td></tr>
<tr class="a">
<td>VAR_SAMP</td>
<td>ARRAY_VAR_SAMP</td>
<td> STRICT_VAR_SAMP </td></tr>
<tr class="b">
<td>VAR_POP</td>
<td>ARRAY_VAR_POP</td>
<td> STRICT_VAR_POP </td></tr>
<tr class="a">
<td>SKEWENESS</td>
<td>ARRAY_SKEWNESS</td>
<td> STRICT_SKEWNESS </td></tr>
<tr class="b">
<td>KURTOSIS</td>
<td>ARRAY_KURTOSIS</td>
<td> STRICT_KURTOSIS </td></tr>
<tr class="a">
<td> </td>
<td>ARRAY_AGG</td>
<td> </td></tr>
</tbody>
</table>
<p>Exception: the ordinary aggregation function STRICT_COUNT operates on any collection, and returns a count of its items, including null values in the count. In this respect, STRICT_COUNT is more similar to COUNT(*) than to COUNT(expression).</p>
<p>Note that the ordinary aggregation functions that ignore <tt>null</tt> have names beginning with &#x201c;ARRAY&#x201d;. This naming convention has historical roots. Despite their names, the functions operate on both arrays and multisets.</p>
<p>Because of the special properties of the aggregation pseudo-functions, SQL (and therefore SQL++) is not a pure functional language. But every query that uses a pseudo-function can be expressed as an equivalent query that uses an ordinary function. Q3.23 is an example of how queries can be expressed without pseudo-functions. A more detailed explanation of all of the functions is also available in the section on <a href="builtins.html#AggregateFunctions">Aggregate Functions</a>.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.23) Alternative form of Q3.22, using the ordinary function <tt>ARRAY_AVG</tt> rather than the aggregating pseudo-function <tt>AVG</tt>.</p>
<div>
<div>
<pre class="source">SELECT ARRAY_AVG(
(SELECT VALUE c.rating
FROM customers AS c) ) AS `avg credit rating`;
</pre></div></div>
<p>Result (same as Q3.22):</p>
<div>
<div>
<pre class="source">[
{
&quot;avg credit rating&quot;: 670
}
]
</pre></div></div>
<p>If the function <tt>STRICT_AVG</tt> had been used in Q3.23 in place of <tt>ARRAY_AVG</tt>, the average credit rating returned by the query would have been <tt>null</tt>, because at least one customer has no credit rating.</p></div></div></div>
<div class="section">
<h3><a name="GROUP_AS_Clause"></a><a name="Group_As_clauses" id="Group_As_clauses">GROUP AS Clause</a></h3>
<div class="section">
<div class="section">
<h5><a name="GroupAsClause"></a>GroupAsClause</h5>
<p><img src="../images/diagrams/GroupAsClause.png" alt="" /></p>
<p>JSON is a hierarchical format, and a fully featured JSON query language needs to be able to produce hierarchies of its own, with computed data at every level of the hierarchy. The key feature of SQL++ that makes this possible is the <tt>GROUP AS</tt> clause.</p>
<p>A query may have a <tt>GROUP AS</tt> clause only if it has a <tt>GROUP BY</tt> clause. The <tt>GROUP BY</tt> clause &#x201c;hides&#x201d; the original objects in each group, exposing only the grouping expressions and special aggregation functions on the non-grouping fields. The purpose of the <tt>GROUP AS</tt> clause is to make the original objects in the group visible to subsequent clauses. Thus the query can generate output data both for the group as a whole and for the individual objects inside the group.</p>
<p>For each group, the <tt>GROUP AS</tt> clause preserves all the objects in the group, just as they were before grouping, and gives a name to this preserved group. The group name can then be used in the <tt>FROM</tt> clause of a subquery to process and return the individual objects in the group.</p>
<p>To see how this works, we&#x2019;ll write some queries that investigate the customers in each zipcode and their credit ratings. This would be a good time to review the sample database in <a href="#Manual_data">Appendix 4</a>. A part of the data is summarized below.</p>
<div>
<div>
<pre class="source">Customers in zipcode 02115:
C35, J. Roberts, rating 565
C37, T. Henry, rating 750
Customers in zipcode 02340:
C25, M. Sinclair, rating 690
Customers in zipcode 63101:
C13, T. Cody, rating 750
C31, B. Pruitt, (no rating)
C41, R. Dodge, rating 640
Customers with no zipcode:
C47, S. Logan, rating 625
</pre></div></div>
<p>Now let&#x2019;s consider the effect of the following clauses:</p>
<div>
<div>
<pre class="source">FROM customers AS c
GROUP BY c.address.zipcode
GROUP AS g
</pre></div></div>
<p>This query fragment iterates over the <tt>customers</tt> objects, using the iteration variable <tt>c</tt>. The <tt>GROUP BY</tt> clause forms the objects into groups, each with a common zipcode (including one group for customers with no zipcode). After the <tt>GROUP BY</tt> clause, we can see the grouping expression, <tt>c.address.zipcode</tt>, but other fields such as <tt>c.custid</tt> and <tt>c.name</tt> are visible only to special aggregation functions.</p>
<p>The clause <tt>GROUP AS g</tt> now makes the original objects visible again. For each group in turn, the variable <tt>g</tt> is bound to a multiset of objects, each of which has a field named <tt>c</tt>, which in turn contains one of the original objects. Thus after <tt>GROUP AS g</tt>, for the group with zipcode 02115, <tt>g</tt> is bound to the following multiset:</p>
<div>
<div>
<pre class="source">[
{ &quot;c&quot;:
{ &quot;custid&quot;: &quot;C35&quot;,
&quot;name&quot;: &quot;J. Roberts&quot;,
&quot;address&quot;:
{ &quot;street&quot;: &quot;420 Green St.&quot;,
&quot;city&quot;: &quot;Boston, MA&quot;,
&quot;zipcode&quot;: &quot;02115&quot;
},
&quot;rating&quot;: 565
}
},
{ &quot;c&quot;:
{ &quot;custid&quot;: &quot;C37&quot;,
&quot;name&quot;: &quot;T. Henry&quot;,
&quot;address&quot;:
{ &quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;,
&quot;zipcode&quot;: &quot;02115&quot;
},
&quot;rating&quot;: 750
}
}
]
</pre></div></div>
<p>Thus, the clauses following <tt>GROUP AS</tt> can see the original objects by writing subqueries that iterate over the multiset <tt>g</tt>.</p>
<p>The extra level named <tt>c</tt> was introduced into this multiset because the groups might have been formed from a join of two or more collections. Suppose that the <tt>FROM</tt> clause looked like <tt>FROM customers AS c, orders AS o</tt>. Then each item in the group would contain both a <tt>customers</tt> object and an <tt>orders</tt> object, and these two objects might both have a field with the same name. To avoid ambiguity, each of the original objects is wrapped in an &#x201c;outer&#x201d; object that gives it the name of its iteration variable in the <tt>FROM</tt> clause. Consider this fragment:</p>
<div>
<div>
<pre class="source">FROM customers AS c, orders AS o
WHERE c.custid = o.custid
GROUP BY c.address.zipcode
GROUP AS g
</pre></div></div>
<p>In this case, following <tt>GROUP AS g</tt>, the variable <tt>g</tt> would be bound to the following collection:</p>
<div>
<div>
<pre class="source">[
{ &quot;c&quot;: { an original customers object },
&quot;o&quot;: { an original orders object }
},
{ &quot;c&quot;: { another customers object },
&quot;o&quot;: { another orders object }
},
...
]
</pre></div></div>
<p>After using <tt>GROUP AS</tt> to make the content of a group accessible, you will probably want to write a subquery to access that content. A subquery for this purpose is written in exactly the same way as any other subquery. The name specified in the <tt>GROUP AS</tt> clause (<tt>g</tt> in the above example) is the name of a collection of objects. You can write a <tt>FROM</tt> clause to iterate over the objects in the collection, and you can specify an iteration variable to represent each object in turn. For <tt>GROUP AS</tt> queries in this manual, we&#x2019;ll use <tt>g</tt>as the name of the reconstituted group, and <tt>gi</tt> as an iteration variable representing one object inside the group. Of course, you can use any names you like for these purposes.</p>
<p>Now we are ready to take a look at how <tt>GROUP AS</tt> might be used in a query. Suppose that we want to group customers by zipcode, and for each group we want to see the average credit rating and a list of the individual customers in the group. Here&#x2019;s a query that does that:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.24) For each zipcode, list the average credit rating in that zipcode, followed by the customer numbers and names in numeric order.</p>
<div>
<div>
<pre class="source">FROM customers AS c
GROUP BY c.address.zipcode AS zip
GROUP AS g
SELECT zip, AVG(c.rating) AS `avg credit rating`,
(FROM g AS gi
SELECT gi.c.custid, gi.c.name
ORDER BY gi.c.custid) AS `local customers`
ORDER BY zip;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;avg credit rating&quot;: 625,
&quot;local customers&quot;: [
{
&quot;custid&quot;: &quot;C47&quot;,
&quot;name&quot;: &quot;S. Logan&quot;
}
]
},
{
&quot;avg credit rating&quot;: 657.5,
&quot;local customers&quot;: [
{
&quot;custid&quot;: &quot;C35&quot;,
&quot;name&quot;: &quot;J. Roberts&quot;
},
{
&quot;custid&quot;: &quot;C37&quot;,
&quot;name&quot;: &quot;T. Henry&quot;
}
],
&quot;zip&quot;: &quot;02115&quot;
},
{
&quot;avg credit rating&quot;: 690,
&quot;local customers&quot;: [
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;
}
],
&quot;zip&quot;: &quot;02340&quot;
},
{
&quot;avg credit rating&quot;: 695,
&quot;local customers&quot;: [
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;name&quot;: &quot;T. Cody&quot;
},
{
&quot;custid&quot;: &quot;C31&quot;,
&quot;name&quot;: &quot;B. Pruitt&quot;
},
{
&quot;custid&quot;: &quot;C41&quot;,
&quot;name&quot;: &quot;R. Dodge&quot;
}
],
&quot;zip&quot;: &quot;63101&quot;
}
]
</pre></div></div>
<p>Note that this query contains two <tt>ORDER BY</tt> clauses: one in the outer query and one in the subquery. These two clauses govern the ordering of the outer-level list of zipcodes and the inner-level lists of customers, respectively. Also note that the group of customers with no zipcode comes first in the output list.</p></div></div></div></div>
<div class="section">
<h2><a name="Selection_and_UNION_ALL"></a><a name="Union_all" id="Union_all">Selection and UNION ALL</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Selection"></a>Selection</h5>
<p><img src="../images/diagrams/Selection.png" alt="" /></p></div>
<div class="section">
<h5><a name="UnionOption"></a>UnionOption</h5>
<p><img src="../images/diagrams/UnionOption.png" alt="" /></p>
<p>In a SQL++ query, two or more query blocks can be connected by the operator <tt>UNION ALL</tt>. The result of a <tt>UNION ALL</tt> between two query blocks contains all the items returned by the first query block, and all the items returned by the second query block. Duplicate items are not eliminated from the query result.</p>
<p>As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular, it allows heterogeneity on the input and output streams. A type error will be raised if one of the inputs is not a collection.</p>
<p>When two or more query blocks are connected by <tt>UNION ALL</tt>, they can be followed by <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt> clauses that apply to the <tt>UNION</tt> query as a whole. For these clauses to be meaningful, the field-names returned by the two query blocks should match. The following example shows a <tt>UNION ALL</tt> of two query blocks, with an ordering specified for the result.</p>
<p>In this example, a customer might be selected because he has ordered more than two different items (first query block) or because he has a high credit rating (second query block). By adding an explanatory string to each query block, the query writer can cause the output objects to be labeled to distinguish these two cases.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.25a) Find customer ids for customers who have placed orders for more than two different items or who have a credit rating greater than 700, with labels to distinguish these cases.</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
GROUP BY o.orderno, o.custid
HAVING COUNT(*) &gt; 2
SELECT DISTINCT o.custid AS customer_id, &quot;Big order&quot; AS reason
UNION ALL
FROM customers AS c
WHERE rating &gt; 700
SELECT c.custid AS customer_id, &quot;High rating&quot; AS reason
ORDER BY customer_id;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;reason&quot;: &quot;High rating&quot;,
&quot;customer_id&quot;: &quot;C13&quot;
},
{
&quot;reason&quot;: &quot;Big order&quot;,
&quot;customer_id&quot;: &quot;C37&quot;
},
{
&quot;reason&quot;: &quot;High rating&quot;,
&quot;customer_id&quot;: &quot;C37&quot;
},
{
&quot;reason&quot;: &quot;Big order&quot;,
&quot;customer_id&quot;: &quot;C41&quot;
}
]
</pre></div></div>
<p>If, on the other hand, you simply want a list of the customer ids and you don&#x2019;t care to preserve the reasons, you can simplify your output by using <tt>SELECT VALUE</tt>, as follows:</p>
<p>(Q3.25b) Simplify Q3.25a to return a simple list of unlabeled customer ids.</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
GROUP BY o.orderno, o.custid
HAVING COUNT(*) &gt; 2
SELECT VALUE o.custid
UNION ALL
FROM customers AS c
WHERE rating &gt; 700
SELECT VALUE c.custid;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
&quot;C37&quot;,
&quot;C41&quot;,
&quot;C13&quot;,
&quot;C37&quot;
]
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="WITH_Clause"></a><a name="With_clauses" id="With_clauses">WITH Clause</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="WithClause"></a>WithClause</h5>
<p><img src="../images/diagrams/WithClause.png" alt="" /></p>
<p>As in standard SQL, a <tt>WITH</tt> clause can be used to improve the modularity of a query. A <tt>WITH</tt> clause often contains a subquery that is needed to compute some result that is used later in the main query. In cases like this, you can think of the <tt>WITH</tt> clause as computing a &#x201c;temporary view&quot; of the input data. The next example uses a <tt>WITH</tt> clause to compute the total revenue of each order in 2020; then the main part of the query finds the minimum, maximum, and average revenue for orders in that year.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.26) Find the minimum, maximum, and average revenue among all orders in 2020, rounded to the nearest integer.</p>
<div>
<div>
<pre class="source">WITH order_revenue AS
(FROM orders AS o, o.items AS i
WHERE get_year(date(o.order_date)) = 2020
GROUP BY o.orderno
SELECT o.orderno, SUM(i.qty * i.price) AS revenue
)
FROM order_revenue
SELECT AVG(revenue) AS average,
MIN(revenue) AS minimum,
MAX(revenue) AS maximum;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;average&quot;: 4669.99,
&quot;minimum&quot;: 130.45,
&quot;maximum&quot;: 18847.58
}
]
</pre></div></div>
<p><tt>WITH</tt> can be particularly useful when a value needs to be used several times in a query.</p></div></div></div></div>
<div class="section">
<h2><a name="ORDER_BY.2C_LIMIT.2C_and_OFFSET_Clauses"></a><a name="Order_By_clauses" id="Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="OrderbyClause"></a>OrderbyClause</h5>
<p><img src="../images/diagrams/OrderbyClause.png" alt="" /></p></div>
<div class="section">
<h5><a name="LimitClause"></a>LimitClause</h5>
<p><img src="../images/diagrams/LimitClause.png" alt="" /></p></div>
<div class="section">
<h5><a name="OffsetClause"></a>OffsetClause</h5>
<p><img src="../images/diagrams/OffsetClause.png" alt="" /></p>
<p>The last three (optional) clauses to be processed in a query are <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt>.</p>
<p>The <tt>ORDER BY</tt> clause is used to globally sort data in either ascending order (i.e., <tt>ASC</tt>) or descending order (i.e., <tt>DESC</tt>). During ordering (if the <tt>NULLS</tt> modifier is not specified), <tt>MISSING</tt> and <tt>NULL</tt> are treated as being smaller than any other value if they are encountered in the ordering key(s). <tt>MISSING</tt> is treated as smaller than <tt>NULL</tt> if both occur in the data being sorted. The <tt>NULLS</tt> modifier determines how <tt>MISSING</tt> and <tt>NULL</tt> are ordered relative to all other values: first (<tt>NULLS</tt> <tt>FIRST</tt>) or last (<tt>NULLS</tt> <tt>LAST</tt>). The relative order between <tt>MISSING</tt> and <tt>NULL</tt> is not affected by the <tt>NULLS</tt> modifier (i.e. <tt>MISSING</tt> is still treated as smaller than <tt>NULL</tt>). The ordering of values of a given type is consistent with its type&#x2019;s <tt>&lt;=</tt> ordering; the ordering of values across types is implementation-defined but stable.</p>
<p>The <tt>LIMIT</tt> clause is used to limit the result set to a specified maximum size. The optional <tt>OFFSET</tt> clause is used to specify a number of items in the output stream to be discarded before the query result begins. The <tt>OFFSET</tt> can also be used as a standalone clause, without the <tt>LIMIT</tt>.</p>
<p>The following example illustrates use of the <tt>ORDER BY</tt> and <tt>LIMIT</tt> clauses.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.27) Return the top three customers by rating.</p>
<div>
<div>
<pre class="source">FROM customers AS c
SELECT c.custid, c.name, c.rating
ORDER BY c.rating DESC
LIMIT 3;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;rating&quot;: 750
},
{
&quot;custid&quot;: &quot;C37&quot;,
&quot;name&quot;: &quot;T. Henry&quot;,
&quot;rating&quot;: 750
},
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;,
&quot;rating&quot;: 690
}
]
</pre></div></div>
<p>The following example illustrates the use of <tt>OFFSET</tt>:</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.38) Find the customer with the third-highest credit rating.</p>
<div>
<div>
<pre class="source">FROM customers AS c
SELECT c.custid, c.name, c.rating
ORDER BY c.rating DESC
LIMIT 1 OFFSET 2;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;,
&quot;rating&quot;: 690
}
]
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Subqueries" id="Subqueries">Subqueries</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Subquery"></a>Subquery</h5>
<p><img src="../images/diagrams/Subquery.png" alt="" /></p>
<p>A subquery is simply a query surrounded by parentheses. In SQL++, a subquery can appear anywhere that an expression can appear. Like any query, a subquery always returns a collection, even if the collection contains only a single value or is empty. If the subquery has a SELECT clause, it returns a collection of objects. If the subquery has a SELECT VALUE clause, it returns a collection of scalar values. If a single scalar value is expected, the indexing operator [0] can be used to extract the single scalar value from the collection.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.29) (Subquery in SELECT clause) For every order that includes item no. 120, find the order number, customer id, and customer name.</p>
<p>Here, the subquery is used to find a customer name, given a customer id. Since the outer query expects a scalar result, the subquery uses SELECT VALUE and is followed by the indexing operator [0].</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
WHERE i.itemno = 120
SELECT o.orderno, o.custid,
(FROM customers AS c
WHERE c.custid = o.custid
SELECT VALUE c.name)[0] AS name;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1003,
&quot;custid&quot;: &quot;C31&quot;,
&quot;name&quot;: &quot;B. Pruitt&quot;
},
{
&quot;orderno&quot;: 1006,
&quot;custid&quot;: &quot;C41&quot;,
&quot;name&quot;: &quot;R. Dodge&quot;
}
]
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.30) (Subquery in WHERE clause) Find the customer number, name, and rating of all customers whose rating is greater than the average rating.</p>
<p>Here, the subquery is used to find the average rating among all customers. Once again, SELECT VALUE and indexing [0] have been used to get a single scalar value.</p>
<div>
<div>
<pre class="source">FROM customers AS c1
WHERE c1.rating &gt;
(FROM customers AS c2
SELECT VALUE AVG(c2.rating))[0]
SELECT c1.custid, c1.name, c1.rating;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;rating&quot;: 750
},
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;,
&quot;rating&quot;: 690
},
{
&quot;custid&quot;: &quot;C37&quot;,
&quot;name&quot;: &quot;T. Henry&quot;,
&quot;rating&quot;: 750
}
]
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q3.31) (Subquery in FROM clause) Compute the total revenue (sum over items of quantity time price) for each order, then find the average, maximum, and minimum total revenue over all orders.</p>
<p>Here, the FROM clause expects to iterate over a collection of objects, so the subquery uses an ordinary SELECT and does not need to be indexed. You might think of a FROM clause as a &#x201c;natural home&#x201d; for a subquery.</p>
<div>
<div>
<pre class="source">FROM
(FROM orders AS o, o.items AS i
GROUP BY o.orderno
SELECT o.orderno, SUM(i.qty * i.price) AS revenue
) AS r
SELECT AVG(r.revenue) AS average,
MIN(r.revenue) AS minimum,
MAX(r.revenue) AS maximum;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">[
{
&quot;average&quot;: 4669.99,
&quot;minimum&quot;: 130.45,
&quot;maximum&quot;: 18847.58
}
]
</pre></div></div>
<p>Note the similarity between Q3.26 and Q3.31. This illustrates how a subquery can often be moved into a <tt>WITH</tt> clause to improve the modularity and readability of a query.</p><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Over_clauses" id="Over_clauses">4. Window Functions</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>Window functions are special functions that compute aggregate values over a &#x201c;window&#x201d; of input data. Like an ordinary function, a window function returns a value for every item in the input dataset. But in the case of a window function, the value returned by the function can depend not only on the argument of the function, but also on other items in the same collection. For example, a window function applied to a set of employees might return the rank of each employee in the set, as measured by salary. As another example, a window function applied to a set of items, ordered by purchase date, might return the running total of the cost of the items.</p>
<p>A window function call is identified by an <tt>OVER</tt> clause, which can specify three things: partitioning, ordering, and framing. The partitioning specification is like a <tt>GROUP BY</tt>: it splits the input data into partitions. For example, a set of employees might be partitioned by department. The window function, when applied to a given object, is influenced only by other objects in the same partition. The ordering specification is like an <tt>ORDER BY</tt>: it determines the ordering of the objects in each partition. The framing specification defines a &#x201c;frame&#x201d; that moves through the partition, defining how the result for each object depends on nearby objects. For example, the frame for a current object might consist of the two objects before and after the current one; or it might consist of all the objects before the current one in the same partition. A window function call may also specify some options that control (for example) how nulls are handled by the function.</p>
<p>Here is an example of a window function call:</p>
<div>
<div>
<pre class="source">SELECT deptno, purchase_date, item, cost,
SUM(cost) OVER (
PARTITION BY deptno
ORDER BY purchase_date
ROWS UNBOUNDED PRECEDING) AS running_total_cost
FROM purchases
ORDER BY deptno, purchase_date
</pre></div></div>
<p>This example partitions the <tt>purchases</tt> dataset by department number. Within each department, it orders the <tt>purchases</tt> by date and computes a running total cost for each item, using the frame specification <tt>ROWS UNBOUNDED PRECEDING</tt>. Note that the <tt>ORDER BY</tt> clause in the window function is separate and independent from the <tt>ORDER BY</tt> clause of the query as a whole.</p>
<p>The general syntax of a window function call is specified in this section. SQL++ has a set of builtin window functions, which are listed and explained in the <a href="builtins.html#WindowFunctions">Window Functions</a> section of the builtin functions page. In addition, standard SQL aggregate functions such as <tt>SUM</tt> and <tt>AVG</tt> can be used as window functions if they are used with an <tt>OVER</tt> clause.</p></div></div></div></div>
<div class="section">
<h2><a name="Window_Function_Call"></a><a name="Window_function_call" id="Window_function_call">Window Function Call</a></h2>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="WindowFunctionCall"></a>WindowFunctionCall</h5>
<p><img src="../images/diagrams/WindowFunctionCall.png" alt="" /></p></div>
<div class="section">
<h5><a name="WindowFunctionType"></a>WindowFunctionType</h5>
<p><img src="../images/diagrams/WindowFunctionType.png" alt="" /></p>
<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section for a list of aggregate functions.</p>
<p>Refer to the <a href="builtins.html#WindowFunctions">Window Functions</a> section for a list of window functions.</p></div></div></div>
<div class="section">
<h3><a name="Window_Function_Arguments"></a><a name="Window_function_arguments" id="Window_function_arguments">Window Function Arguments</a></h3>
<div class="section">
<div class="section">
<h5><a name="WindowFunctionArguments"></a>WindowFunctionArguments</h5>
<p><img src="../images/diagrams/WindowFunctionArguments.png" alt="" /></p>
<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section or the <a href="builtins.html#WindowFunctions">Window Functions</a> section for details of the arguments for individual functions.</p></div></div></div>
<div class="section">
<h3><a name="Window_Function_Options"></a><a name="Window_function_options" id="Window_function_options">Window Function Options</a></h3>
<div class="section">
<div class="section">
<h5><a name="WindowFunctionOptions"></a>WindowFunctionOptions</h5>
<p><img src="../images/diagrams/WindowFunctionOptions.png" alt="" /></p>
<p>Window function options cannot be used with <a href="builtins.html#AggregateFunctions">aggregate functions</a>.</p>
<p>Window function options can only be used with some <a href="builtins.html#WindowFunctions">window functions</a>, as described below.</p>
<p>The <i>FROM modifier</i> determines whether the computation begins at the first or last tuple in the window. It is optional and can only be used with the <tt>nth_value()</tt> function. If it is omitted, the default setting is <tt>FROM FIRST</tt>.</p>
<p>The <i>NULLS modifier</i> determines whether NULL values are included in the computation, or ignored. MISSING values are treated the same way as NULL values. It is also optional and can only be used with the <tt>first_value()</tt>, <tt>last_value()</tt>, <tt>nth_value()</tt>, <tt>lag()</tt>, and <tt>lead()</tt> functions. If omitted, the default setting is <tt>RESPECT NULLS</tt>.</p></div></div></div>
<div class="section">
<h3><a name="Window_Frame_Variable"></a><a name="Window_frame_variable" id="Window_frame_variable">Window Frame Variable</a></h3>
<p>The <tt>AS</tt> keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents of the frame. When using a built-in <a href="builtins.html#AggregateFunctions">aggregate function</a> as a window function, the function&#x2019;s argument must be a subquery which refers to this alias, for example:</p>
<div>
<div>
<pre class="source">SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field))
OVER alias AS (PARTITION BY &#x2026; ORDER BY &#x2026;)
FROM source AS src
</pre></div></div>
<p>The alias is not necessary when using a <a href="builtins.html#WindowFunctions">window function</a>, or when using a standard SQL aggregate function with the <tt>OVER</tt> clause.</p></div>
<div class="section">
<h3><a name="Window_Definition"></a><a name="Window_definition" id="Window_definition">Window Definition</a></h3>
<div class="section">
<div class="section">
<h5><a name="WindowDefinition"></a>WindowDefinition</h5>
<p><img src="../images/diagrams/WindowDefinition.png" alt="" /></p>
<p>The <i>window definition</i> specifies the partitioning, ordering, and framing for window functions.</p></div></div>
<div class="section">
<h4><a name="Window_Partition_Clause"></a><a name="Window_partition_clause" id="Window_partition_clause">Window Partition Clause</a></h4>
<div class="section">
<h5><a name="WindowPartitionClause"></a>WindowPartitionClause</h5>
<p><img src="../images/diagrams/WindowPartitionClause.png" alt="" /></p>
<p>The <i>window partition clause</i> divides the tuples into logical partitions using one or more expressions.</p>
<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
<p>This clause is optional. If omitted, all tuples are united in a single partition.</p></div></div>
<div class="section">
<h4><a name="Window_Order_Clause"></a><a name="Window_order_clause" id="Window_order_clause">Window Order Clause</a></h4>
<div class="section">
<h5><a name="WindowOrderClause"></a>WindowOrderClause</h5>
<p><img src="../images/diagrams/WindowOrderClause.png" alt="" /></p>
<p>The <i>window order clause</i> determines how tuples are ordered within each partition. The window function works on tuples in the order specified by this clause.</p>
<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
<p>This clause is optional. If omitted, all tuples are considered peers, i.e. their order is tied. When tuples in the window partition are tied, each window function behaves differently.</p>
<ul>
<li>
<p>The <tt>row_number()</tt> function returns a distinct number for each tuple. If tuples are tied, the results may be unpredictable.</p>
</li>
<li>
<p>The <tt>rank()</tt>, <tt>dense_rank()</tt>, <tt>percent_rank()</tt>, and <tt>cume_dist()</tt> functions return the same result for each tuple.</p>
</li>
<li>
<p>For other functions, if the <a href="#Window_frame_clause">window frame</a> is defined by <tt>ROWS</tt>, the results may be unpredictable. If the window frame is defined by <tt>RANGE</tt> or <tt>GROUPS</tt>, the results are same for each tuple.</p>
</li>
</ul>
<p><b>Note:</b> This clause does not guarantee the overall order of the query results. To guarantee the order of the final results, use the query <tt>ORDER BY</tt> clause.</p></div></div>
<div class="section">
<h4><a name="Window_Frame_Clause"></a><a name="Window_frame_clause" id="Window_frame_clause">Window Frame Clause</a></h4>
<div class="section">
<h5><a name="WindowFrameClause"></a>WindowFrameClause</h5>
<p><img src="../images/diagrams/WindowFrameClause.png" alt="" /></p>
<p>The <i>window frame clause</i> defines the window frame. It can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> &#x2014; refer to the descriptions of individual functions for more details. It is optional and allowed only when the <a href="#Window_order_clause">window order clause</a> is present.</p>
<ul>
<li>
<p>If this clause is omitted and there is no <a href="#Window_order_clause">window order clause</a>, the window frame is the entire partition.</p>
</li>
<li>
<p>If this clause is omitted but there is a <a href="#Window_order_clause">window order clause</a>, the window frame becomes all tuples in the partition preceding the current tuple and its peers &#x2014; the same as <tt>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</tt>.</p>
</li>
</ul>
<p>The window frame can be defined in the following ways:</p>
<ul>
<li>
<p><tt>ROWS</tt>: Counts the exact number of tuples within the frame. If window ordering doesn&#x2019;t result in unique ordering, the function may produce unpredictable results. You can add a unique expression or more window ordering expressions to produce unique ordering.</p>
</li>
<li>
<p><tt>RANGE</tt>: Looks for a value offset within the frame. The function produces deterministic results.</p>
</li>
<li>
<p><tt>GROUPS</tt>: Counts all groups of tied rows within the frame. The function produces deterministic results.</p>
</li>
</ul>
<p><b>Note:</b> If this clause uses <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, the <a href="#Window_order_clause">window order clause</a> must have only a single ordering term. The ordering term expression must evaluate to a number. If these conditions are not met, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0. This restriction does not apply when the window frame uses <tt>ROWS</tt> or <tt>GROUPS</tt>.</p>
<p><b>Tip:</b> The <tt>RANGE</tt> window frame is commonly used to define window frames based on date or time. If you want to use <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, and you want to use an ordering expression based on date or time, the expression in <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt> must use a data type that can be added to the ordering expression.</p></div></div>
<div class="section">
<h4><a name="Window_Frame_Extent"></a><a name="Window_frame_extent" id="Window_frame_extent">Window Frame Extent</a></h4>
<div class="section">
<h5><a name="WindowFrameExtent"></a>WindowFrameExtent</h5>
<p><img src="../images/diagrams/WindowFrameExtent.png" alt="" /></p>
<p>The <i>window frame extent clause</i> specifies the start point and end point of the window frame. The expression before <tt>AND</tt> is the start point and the expression after <tt>AND</tt> is the end point. If <tt>BETWEEN</tt> is omitted, you can only specify the start point; the end point becomes <tt>CURRENT ROW</tt>.</p>
<p>The window frame end point can&#x2019;t be before the start point. If this clause violates this restriction explicitly, an error will result. If it violates this restriction implicitly, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0.</p>
<p>Window frame extents that result in an explicit violation are:</p>
<ul>
<li>
<p><tt>BETWEEN CURRENT ROW AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
</li>
<li>
<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
</li>
<li>
<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND CURRENT ROW</tt></p>
</li>
</ul>
<p>Window frame extents that result in an implicit violation are:</p>
<ul>
<li>
<p><tt>BETWEEN UNBOUNDED PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> &#x2014; if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
</li>
<li>
<p><tt>BETWEEN</tt> <i>Expr</i> <tt>PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> &#x2014; if the second <i>Expr</i> is greater than or equal to the first <i>Expr</i>, all result sets will generate an empty window frame.</p>
</li>
<li>
<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>FOLLOWING</tt> &#x2014; if the first <i>Expr</i> is greater than or equal to the second <i>Expr</i>, all result sets will generate an empty window frame.</p>
</li>
<li>
<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND UNBOUNDED FOLLOWING</tt> &#x2014; if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
</li>
<li>
<p>If the <a href="#Window_frame_exclusion">window frame exclusion clause</a> is present, any window frame specification may result in empty window frame.</p>
</li>
</ul>
<p>The <i>Expr</i> must be a positive constant or an expression that evaluates as a positive number. For <tt>ROWS</tt> or <tt>GROUPS</tt>, the <i>Expr</i> must be an integer.</p></div></div>
<div class="section">
<h4><a name="Window_Frame_Exclusion"></a><a name="Window_frame_exclusion" id="Window_frame_exclusion">Window Frame Exclusion</a></h4>
<div class="section">
<h5><a name="WindowFrameExclusion"></a>WindowFrameExclusion</h5>
<p><img src="../images/diagrams/WindowFrameExclusion.png" alt="" /></p>
<p>The <i>window frame exclusion clause</i> enables you to exclude specified tuples from the window frame.</p>
<p>This clause can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> &#x2014; refer to the descriptions of individual functions for more details.</p>
<p>This clause is allowed only when the <a href="#Window_frame_clause">window frame clause</a> is present.</p>
<p>This clause is optional. If this clause is omitted, the default is no exclusion &#x2014; the same as <tt>EXCLUDE NO OTHERS</tt>.</p>
<ul>
<li>
<p><tt>EXCLUDE CURRENT ROW</tt>: If the current tuple is still part of the window frame, it is removed from the window frame.</p>
</li>
<li>
<p><tt>EXCLUDE GROUP</tt>: The current tuple and any peers of the current tuple are removed from the window frame.</p>
</li>
<li>
<p><tt>EXCLUDE TIES</tt>: Any peers of the current tuple, but not the current tuple itself, are removed from the window frame.</p>
</li>
<li>
<p><tt>EXCLUDE NO OTHERS</tt>: No additional tuples are removed from the window frame.</p>
</li>
</ul>
<p>If the current tuple is already removed from the window frame, then it remains removed from the window frame.</p><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Errors" id="Errors">5. Errors</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>A query can potentially result in one of the following errors:</p>
<ul>
<li>syntax error,</li>
<li>identifier resolution error,</li>
<li>type error,</li>
<li>resource error.</li>
</ul>
<p>If the query processor runs into any error, it will terminate the ongoing processing of the query and immediately return an error message to the client.</p></div></div></div></div>
<div class="section">
<h2><a name="Syntax_Errors"></a><a name="Syntax_errors" id="Syntax_errors">Syntax Errors</a></h2>
<p>A valid query must satisfy the grammar rules of the query language. Otherwise, a syntax error will be raised.</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q4.1)</p>
<div>
<div>
<pre class="source">customers AS c
SELECT *
</pre></div></div>
<p>Since the queryhas no <tt>FROM</tt> keyword before the dataset <tt>customers</tt>, we will get a syntax error as follows:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 2 &gt;&gt;customers AS c&lt;&lt; Encountered \&quot;AS\&quot; at column 11. &quot;
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q4.2)</p>
<div>
<div>
<pre class="source"> FROM customers AS c
WHERE type=&quot;advertiser&quot;
SELECT *;
</pre></div></div>
<p>Since &#x201c;type&#x201d; is a reserved keyword in the query parser, we will get a syntax error as follows:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 3 &gt;&gt; WHERE type=\&quot;advertiser\&quot;&lt;&lt; Encountered \&quot;type\&quot; at column 8. &quot;;
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Identifier_Resolution_Errors"></a><a name="Identifier_resolution_errors" id="Identifier_resolution_errors">Identifier Resolution Errors</a></h2>
<p>Referring to an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access.</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q4.3)</p>
<div>
<div>
<pre class="source"> FROM customer AS c
SELECT *
</pre></div></div>
<p>If we have a typo as above in &#x201c;customers&#x201d; that misses the dataset name&#x2019;s ending &#x201c;s&#x201d;, we will get an identifier resolution error as follows:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX1077: Cannot find dataset customer in dataverse Commerce nor an alias with name customer (in line 2, at column 7)&quot;
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q4.4)</p>
<div>
<div>
<pre class="source"> FROM customers AS c JOIN orders AS o ON c.custid = o.custid
SELECT name, orderno;
</pre></div></div>
<p>If the compiler cannot figure out how to resolve an unqualified field name, which will occur if there is more than one variable in scope (e.g., <tt>customers AS c</tt> and <tt>orders AS o</tt> as above), we will get an identifier resolution error as follows:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX1074: Cannot resolve ambiguous alias reference for identifier name (in line 3, at column 9)&quot;
</pre></div></div>
<p>The same can happen when failing to properly identify the <tt>GROUP BY</tt> expression.</p>
<p>(Q4.5)</p>
<div>
<div>
<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
FROM orders AS o
GROUP BY custid;
</pre></div></div>
<p>Result:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX1073: Cannot resolve alias reference for undefined identifier o (in line 2, at column 8)&quot;
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Type_Errors"></a><a name="Type_errors" id="Type_errors">Type Errors</a></h2>
<p>The query compiler does type checks based on its available type information. In addition, the query runtime also reports type errors if a data model instance it processes does not satisfy the type requirement.</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<p>(Q4.6)</p>
<div>
<div>
<pre class="source">get_day(10/11/2020);
</pre></div></div>
<p>Since function <tt>get_day</tt> can only process duration, daytimeduration, date, or datetime input values, we will get a type error as follows:</p>
<div>
<div>
<pre class="source">ERROR: Code: 1 &quot;ASX0002: Type mismatch: function get-day expects its 1st input parameter to be of type duration, daytimeduration, date or datetime, but the actual input type is double (in line 2, at column 1)&quot;
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Resource_Errors"></a><a name="Resource_errors" id="Resource_errors">Resource Errors</a></h2>
<p>A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource errors could be potentially be seen when running the system:</p>
<div>
<div>
<pre class="source">Error: no space left on device
Error: too many open files
</pre></div></div>
<p>The &#x201c;no space left on device&#x201d; issue usually can be fixed by cleaning up disk space and reserving more disk space for the system. The &#x201c;too many open files&#x201d; issue usually can be fixed by a system administrator, following the instructions <a class="externalLink" href="https://easyengine.io/tutorials/linux/increase-open-files-limit/">here</a>.</p><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Vs_SQL-92" id="Vs_SQL-92">6. Differences from SQL-92</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>SQL++ offers the following additional features beyond SQL-92:</p>
<ul>
<li>Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.</li>
<li>Schema-free: The query language does not assume the existence of a static schema for any data that it processes.</li>
<li>Correlated <tt>FROM</tt> terms: A right-side <tt>FROM</tt> term expression can refer to variables defined by <tt>FROM</tt> terms on its left.</li>
<li>Powerful <tt>GROUP BY</tt>: In addition to a set of aggregate functions as in standard SQL, the groups created by the <tt>GROUP BY</tt> clause are directly usable in nested queries and/or to obtain nested results.</li>
<li>Generalized <tt>SELECT</tt> clause: A <tt>SELECT</tt> clause can return any type of collection, while in SQL-92, a <tt>SELECT</tt> clause has to return a (homogeneous) collection of objects.</li>
</ul>
<p>The following matrix is a quick &#x201c;SQL-92 compatibility cheat sheet&#x201d; for SQL++.</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> Feature </th>
<th> SQL++ </th>
<th> SQL-92 </th>
<th> Why different? </th></tr>
</thead><tbody>
<tr class="b">
<td> SELECT * </td>
<td> Returns nested objects </td>
<td> Returns flattened concatenated objects </td>
<td> Nested collections are 1st class citizens </td></tr>
<tr class="a">
<td> SELECT list </td>
<td> order not preserved </td>
<td> order preserved </td>
<td> Fields in a JSON object are not ordered </td></tr>
<tr class="b">
<td> Subquery </td>
<td> Returns a collection </td>
<td> The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function </td>
<td> Nested collections are 1st class citizens </td></tr>
<tr class="a">
<td> LEFT OUTER JOIN </td>
<td> Fills in <tt>MISSING</tt>(s) for non-matches </td>
<td> Fills in <tt>NULL</tt>(s) for non-matches </td>
<td> &#x201c;Absence&#x201d; is more appropriate than &#x201c;unknown&#x201d; here </td></tr>
<tr class="b">
<td> UNION ALL </td>
<td> Allows heterogeneous inputs and output </td>
<td> Input streams must be UNION-compatible and output field names are drawn from the first input stream </td>
<td> Heterogenity and nested collections are common </td></tr>
<tr class="a">
<td> IN constant_expr </td>
<td> The constant expression has to be an array or multiset, i.e., [..,..,&#x2026;] </td>
<td> The constant collection can be represented as comma-separated items in a paren pair </td>
<td> Nested collections are 1st class citizens </td></tr>
<tr class="b">
<td> String literal </td>
<td> Double quotes or single quotes </td>
<td> Single quotes only </td>
<td> Double quoted strings are pervasive in JSON</td></tr>
<tr class="a">
<td> Delimited identifiers </td>
<td> Backticks </td>
<td> Double quotes </td>
<td> Double quoted strings are pervasive in JSON </td></tr>
</tbody>
</table>
<p>The following SQL-92 features are not implemented yet. However, SQL++ does not conflict with these features:</p>
<ul>
<li>CROSS JOIN, NATURAL JOIN, UNION JOIN</li>
<li>FULL OUTER JOIN</li>
<li>INTERSECT, EXCEPT, UNION with set semantics</li>
<li>CAST expression</li>
<li>ALL and SOME predicates for linking to subqueries</li>
<li>UNIQUE predicate (tests a collection for duplicates)</li>
<li>MATCH predicate (tests for referential integrity)</li>
<li>Row and Table constructors</li>
<li>Preserved order for expressions in a SELECT list</li>
</ul><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="DDL_and_DML_statements" id="DDL_and_DML_statements">7. DDL and DML statements</a></h1>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Stmnt"></a>Stmnt</h5>
<p><img src="../images/diagrams/Stmnt.png" alt="" /></p></div>
<div class="section">
<h5><a name="SingleStmnt"></a>SingleStmnt</h5>
<p><img src="../images/diagrams/SingleStmnt.png" alt="" /></p>
<p>In addition to queries, an implementation of SQL++ needs to support statements for data definition and manipulation purposes as well as controlling the context to be used in evaluating query expressions. This section details the DDL and DML statements supported in SQL++ as realized today in Apache AsterixDB.</p><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Lifecycle_Management_Statements"></a><a name="Lifecycle_management_statements" id="Lifecycle_management_statements">Lifecycle Management Statements</a></h2>
<div class="section">
<h3><a name="Use_Statement"></a><a name="Use" id="Use">Use Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="UseStmnt"></a>UseStmnt</h5>
<p><img src="../images/diagrams/UseStmnt.png" alt="" /></p></div>
<div class="section">
<h5><a name="DataverseName"></a>DataverseName</h5>
<p><img src="../images/diagrams/DataverseName.png" alt="" /></p>
<p>At the uppermost level, the world of data is organized into data namespaces called <b>dataverses</b>. To set the default dataverse for statements, the <tt>USE</tt> statement is provided.</p>
<p>As an example, the following statement sets the default dataverse to be <tt>Commerce</tt>.</p>
<div>
<div>
<pre class="source">USE Commerce;
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Set_Statement"></a><a name="Sets" id="Sets"> Set Statement</a></h3>
<p>The <tt>SET</tt> statement can be used to override certain configuration parameters. More information about <tt>SET</tt> can be found in <a href="#Performance_tuning">Appendix 2</a>.</p></div>
<div class="section">
<h3><a name="Function_Declaration"></a><a name="Functions" id="Functions"> Function Declaration</a></h3>
<p>When writing a complex query, it can sometimes be helpful to define one or more auxiliary functions that each address a sub-piece of the overall query.</p>
<p>The <tt>DECLARE FUNCTION</tt> statement supports the creation of such helper functions. In general, the function body (expression) can be any legal query expression.</p>
<p>The function named in the <tt>DECLARE FUNCTION</tt> statement is accessible only in the current query. To create a persistent function for use in multiple queries, use the <tt>CREATE FUNCTION</tt> statement.</p>
<div class="section">
<div class="section">
<h5><a name="FunctionDeclaration"></a>FunctionDeclaration</h5>
<p><img src="../images/diagrams/FunctionDeclaration.png" alt="" /></p>
<p>The following is a simple example of a temporary function definition and its use.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">DECLARE FUNCTION nameSearch(customerId){
(SELECT c.custid, c.name
FROM customers AS c
WHERE c.custid = customerId)[0]
};
SELECT VALUE nameSearch(&quot;C25&quot;);
</pre></div></div>
<p>For our sample data set, this returns:</p>
<div>
<div>
<pre class="source">[
{ &quot;custid&quot;: &quot;C25&quot;, &quot;name&quot;: &quot;M. Sinclair&quot; }
]
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Create_Statement"></a><a name="Create" id="Create"> Create Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="CreateStmnt"></a>CreateStmnt</h5>
<p><img src="../images/diagrams/CreateStmnt.png" alt="" /></p></div>
<div class="section">
<h5><a name="DataverseName"></a>DataverseName</h5>
<p><img src="../images/diagrams/DataverseName.png" alt="" /></p></div>
<div class="section">
<h5><a name="QualifiedName"></a>QualifiedName</h5>
<p><img src="../images/diagrams/QualifiedName.png" alt="" /></p></div>
<div class="section">
<h5><a name="DoubleQualifiedName"></a>DoubleQualifiedName</h5>
<p><img src="../images/diagrams/DoubleQualifiedName.png" alt="" /></p>
<p>The <tt>CREATE</tt> statement is used for creating dataverses as well as other persistent artifacts in a dataverse. It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined query functions.</p></div></div>
<div class="section">
<h4><a name="Create_Dataverse"></a><a name="Dataverses" id="Dataverses"> Create Dataverse</a></h4>
<div class="section">
<h5><a name="CreateDataverse"></a>CreateDataverse</h5>
<p><img src="../images/diagrams/CreateDataverse.png" alt="" /></p>
<p>The <tt>CREATE DATAVERSE</tt> statement is used to create new dataverses. To ease the authoring of reusable query scripts, an optional <tt>IF NOT EXISTS</tt> clause is included to allow creation to be requested either unconditionally or only if the dataverse does not already exist. If this clause is absent, an error is returned if a dataverse with the indicated name already exists.</p>
<p>The following example creates a new dataverse named <tt>Commerce</tt> if one does not already exist.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="Create_Type"></a><a name="Types" id="Types"> Create Type </a></h4>
<div class="section">
<h5><a name="CreateType"></a>CreateType</h5>
<p><img src="../images/diagrams/CreateType.png" alt="" /></p></div>
<div class="section">
<h5><a name="ObjectTypeDef"></a>ObjectTypeDef</h5>
<p><img src="../images/diagrams/ObjectTypeDef.png" alt="" /></p></div>
<div class="section">
<h5><a name="ObjectField"></a>ObjectField</h5>
<p><img src="../images/diagrams/ObjectField.png" alt="" /></p></div>
<div class="section">
<h5><a name="TypeExpr"></a>TypeExpr</h5>
<p><img src="../images/diagrams/TypeExpr.png" alt="" /></p></div>
<div class="section">
<h5><a name="ArrayTypeDef"></a>ArrayTypeDef</h5>
<p><img src="../images/diagrams/ArrayTypeDef.png" alt="" /></p></div>
<div class="section">
<h5><a name="MultisetTypeDef"></a>MultisetTypeDef</h5>
<p><img src="../images/diagrams/MultisetTypeDef.png" alt="" /></p></div>
<div class="section">
<h5><a name="TypeReference"></a>TypeReference</h5>
<p><img src="../images/diagrams/TypeReference.png" alt="" /></p>
<p>The <tt>CREATE TYPE</tt> statement is used to create a new named datatype. This type can then be used to create stored collections or utilized when defining one or more other datatypes. Much more information about the data model is available in the <a href="../datamodel.html">data model reference guide</a>. A new type can be a object type, a renaming of another type, an array type, or a multiset type. A object type can be defined as being either open or closed. Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.</p>
<p>The following example creates three new object types called <tt>addressType</tt>, <tt>customerType</tt>, and <tt>itemType</tt>. Their fields are essentially traditional typed name/value pairs (much like SQL fields). Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. Indeed many of the customer objects contain a rating as well, however this is not necessary for the customer object to be created. As can be seen in the sample data, customers can exist without ratings or with part (or all) of the address missing.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE TYPE addressType AS {
street: string,
city: string,
zipcode: string?
};
CREATE TYPE customerType AS {
custid: string,
name: string,
address: addressType?
};
CREATE TYPE itemType AS {
itemno: int,
qty: int,
price: int
};
</pre></div></div>
<p>Optionally, you may wish to create a type that has an automatically generated primary key field. The example below shows an alternate form of <tt>itemType</tt> which achieves this by setting its primary key, <tt>itemno</tt>, to UUID. (Refer to the Datasets section later for more details on such fields.)</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE TYPE itemType AS {
itemno: uuid,
qty: int,
price: int
};
</pre></div></div>
<p>Note that the type of the <tt>itemno</tt> in this example is UUID. This field type can be used if you want to have an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)</p>
<p>The next example creates a new object type, closed this time, called <tt>orderType</tt>. Instances of this closed type will not be permitted to have extra fields, although the <tt>ship_date</tt> field is marked as optional and may thus be <tt>NULL</tt> or <tt>MISSING</tt> in legal instances of the type. The items field is an array of instances of another object type, <tt>itemType</tt>.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE TYPE orderType AS CLOSED {
orderno: int,
custid: string,
order_date: string,
ship_date: string?,
items: [ itemType ]
};
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="Create_Dataset"></a><a name="Datasets" id="Datasets"> Create Dataset</a></h4>
<div class="section">
<h5><a name="CreateDataset"></a>CreateDataset</h5>
<p><img src="../images/diagrams/CreateDataset.png" alt="" /></p></div>
<div class="section">
<h5><a name="CreateInternalDataset"></a>CreateInternalDataset</h5>
<p><img src="../images/diagrams/CreateInternalDataset.png" alt="" /></p></div>
<div class="section">
<h5><a name="CreateExternalDataset"></a>CreateExternalDataset</h5>
<p><img src="../images/diagrams/CreateExternalDataset.png" alt="" /></p></div>
<div class="section">
<h5><a name="DatasetTypeDef"></a>DatasetTypeDef</h5>
<p><img src="../images/diagrams/DatasetTypeDef.png" alt="" /></p></div>
<div class="section">
<h5><a name="DatasetFieldDef"></a>DatasetFieldDef</h5>
<p><img src="../images/diagrams/DatasetFieldDef.png" alt="" /></p></div>
<div class="section">
<h5><a name="TypeReference"></a>TypeReference</h5>
<p><img src="../images/diagrams/TypeReference.png" alt="" /></p></div>
<div class="section">
<h5><a name="PrimaryKey"></a>PrimaryKey</h5>
<p><img src="../images/diagrams/PrimaryKey.png" alt="" /></p></div>
<div class="section">
<h5><a name="NestedField"></a>NestedField</h5>
<p><img src="../images/diagrams/NestedField.png" alt="" /></p></div>
<div class="section">
<h5><a name="AdapterName"></a>AdapterName</h5>
<p><img src="../images/diagrams/AdapterName.png" alt="" /></p></div>
<div class="section">
<h5><a name="Configuration"></a>Configuration</h5>
<p><img src="../images/diagrams/Configuration.png" alt="" /></p></div>
<div class="section">
<h5><a name="KeyValuePair"></a>KeyValuePair</h5>
<p><img src="../images/diagrams/KeyValuePair.png" alt="" /></p></div>
<div class="section">
<h5><a name="Properties"></a>Properties</h5>
<p><img src="../images/diagrams/Properties.png" alt="" /></p>
<p>The <tt>CREATE DATASET</tt> statement is used to create a new dataset. Datasets are named, multisets of object type instances; they are where data lives persistently and are the usual targets for queries. Datasets are typed, and the system ensures that their contents conform to their type definitions. An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. It is required to have a specified unique primary key field which uniquely identifies the contained objects. (The primary key is also used in secondary indexes to identify the indexed primary data objects.)</p>
<p>Internal datasets contain several advanced options that can be specified when appropriate. One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting <tt>AUTOGENERATED</tt> after the <tt>PRIMARY KEY</tt> identifier. In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object&#x2019;s primary key field value will be auto-generated by the system.</p>
<p>Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the underlying LSM storage components to be merged. (The system supports Log-Structured Merge tree based physical storage for Internal datasets.) Currently the system supports four different component merging policies that can be chosen per dataset: no-merge, constant, prefix, and correlated-prefix. The no-merge policy simply never merges disk components. The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. The prefix policy relies on both component sizes and the number of components to decide which components to merge. It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component&#x2019;s sizes exceeds M or the number of components in the sequence exceeds another threshold C. If such a sequence exists, the components in the sequence are merged together to form a single component. Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. The system&#x2019;s default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.</p>
<p>Another advanced option shown in the syntax above, related to performance and mentioned above, is that a <b>filter</b> can optionally be created on a field to further optimize range queries with predicates on the filter&#x2019;s field. Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. (Refer to <a href="../sqlpp/filters.html">Filter-Based LSM Index Acceleration</a> for more information about filters.)</p>
<p>An External dataset, in contrast to an Internal dataset, has data stored outside of the system&#x2019;s control. Files living in HDFS or in the local filesystem(s) of a cluster&#x2019;s nodes are currently supported. External dataset support allows queries to treat foreign data as though it were stored in the system, making it possible to query &#x201c;legacy&#x201d; file data (for example, Hive data) without having to physically import it. When defining an External dataset, an appropriate adapter type must be selected for the desired external data. (See the <a href="../aql/externaldata.html">Guide to External Data</a> for more information on the available adapters.)</p>
<p>The following example creates an Internal dataset for storing <tt>customerType</tt> objects. It specifies that their <tt>custid</tt> field is their primary key.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INTERNAL DATASET customers(customerType) PRIMARY KEY custid;
</pre></div></div>
<p>The next example creates an Internal dataset (the default kind when no dataset kind is specified) for storing <tt>itemType</tt> objects might look like. It specifies that the <tt>itemno</tt> field should be used as the primary key for the dataset. It also specifies that the <tt>itemno</tt> field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.)</p>
<p>Note that the <tt>itemno</tt> field&#x2019;s declared type must be UUID in this case.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE DATASET MyItems(itemType) PRIMARY KEY itemno AUTOGENERATED;
</pre></div></div>
<p>Alternatively the dataset object type can be specified using inline type definition syntax.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE DATASET MyItems(itemno INT NOT UNKNOWN, qty INT NOT UNKNOWN, price INT NOT UNKNOWN) PRIMARY KEY itemno AUTOGENERATED;
</pre></div></div>
<p>The next example creates an External dataset for querying LineItemType objects. The choice of the <tt>hdfs</tt> adapter means that this dataset&#x2019;s data actually resides in HDFS. The example <tt>CREATE</tt> statement also provides parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
(&quot;hdfs&quot;=&quot;hdfs://HOST:PORT&quot;),
(&quot;path&quot;=&quot;HDFS_PATH&quot;),
(&quot;input-format&quot;=&quot;text-input-format&quot;),
(&quot;format&quot;=&quot;delimited-text&quot;),
(&quot;delimiter&quot;=&quot;|&quot;));
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="Create_Index"></a><a name="Indices" id="Indices">Create Index</a></h4>
<div class="section">
<h5><a name="CreateIndex"></a>CreateIndex</h5>
<p><img src="../images/diagrams/CreateIndex.png" alt="" /></p></div>
<div class="section">
<h5><a name="CreateSecondaryIndex"></a>CreateSecondaryIndex</h5>
<p><img src="../images/diagrams/CreateSecondaryIndex.png" alt="" /></p></div>
<div class="section">
<h5><a name="CreatePrimaryKeyIndex"></a>CreatePrimaryKeyIndex</h5>
<p><img src="../images/diagrams/CreatePrimaryKeyIndex.png" alt="" /></p></div>
<div class="section">
<h5><a name="IndexedElement"></a>IndexedElement</h5>
<p><b><img src="../images/diagrams/IndexedElement.png" alt="" /></b></p></div>
<div class="section">
<h5><a name="ArrayIndexElement"></a>ArrayIndexElement</h5>
<p><b><img src="../images/diagrams/ArrayIndexElement.png" alt="" /></b></p></div>
<div class="section">
<h5><a name="IndexField"></a>IndexField</h5>
<p><b><img src="../images/diagrams/IndexField.png" alt="" /></b></p></div>
<div class="section">
<h5><a name="NestedField"></a>NestedField</h5>
<p><img src="../images/diagrams/NestedField.png" alt="" /></p></div>
<div class="section">
<h5><a name="IndexType"></a>IndexType</h5>
<p><img src="../images/diagrams/IndexType.png" alt="" /></p>
<p>The <tt>CREATE INDEX</tt> statement creates a secondary index on one or more fields of a specified dataset. Supported index types include <tt>BTREE</tt> for totally ordered datatypes, <tt>RTREE</tt> for spatial data, and <tt>KEYWORD</tt> and <tt>NGRAM</tt> for textual (string) data. An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier. An array index can be created on an array or multiset datatype by providing a sequence of <tt>UNNEST</tt> and <tt>SELECT</tt>s to identify the field(s) to be indexed.</p>
<p>An indexed field is not required to be part of the datatype associated with a dataset if the dataset&#x2019;s datatype is declared as open <b>and</b> if the field&#x2019;s type is provided along with its name and if the <tt>ENFORCED</tt> keyword is specified at the end of the index definition. <tt>ENFORCING</tt> an open field introduces a check that makes sure that the actual type of the indexed field (if the optional field exists in the object) always matches this specified (open) field type.</p>
<p>The following example creates a btree index called <tt>cCustIdx</tt> on the <tt>custid</tt> field of the orders dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>custid</tt> field.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX cCustIdx ON orders(custid) TYPE BTREE;
</pre></div></div>
<p>The following example creates a btree index called <tt>oCNameIdx</tt> on the <tt>cname</tt> field of the orders dataset, but does not insert <tt>NULL</tt> and <tt>MISSING</tt> values into the index. By default, if <tt>INCLUDE/EXCLUDE UNKNOWN KEY</tt> is not specified, unknown values will be inserted into btree indexes.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX oCNametIdx ON orders(cname) EXCLUDE UNKNOWN KEY;
</pre></div></div>
<p>The following example creates an open btree index called <tt>oCreatedTimeIdx</tt> on the (non-declared) <tt>createdTime</tt> field of the <tt>orders</tt> dataset having <tt>datetime</tt> type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>createdTime</tt> field. The index is enforced so that records that do not have the <tt>createdTime</tt> field or have a mismatched type on the field cannot be inserted into the dataset.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX oCreatedTimeIdx ON orders(createdTime: datetime?) TYPE BTREE ENFORCED;
</pre></div></div>
<p>The following example creates an open btree index called <tt>cAddedTimeIdx</tt> on the (non-declared) <tt>addedTime</tt> field of the <tt>customers</tt> dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>addedTime</tt> field. The index is not enforced so that records that do not have the <tt>addedTime</tt> field or have a mismatched type on the field can still be inserted into the dataset.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX cAddedTimeIdx ON customers(addedTime: datetime?);
</pre></div></div>
<p>The following example creates a btree index called <tt>oOrderUserNameIdx</tt> on <tt>orderUserName</tt>, a nested field residing within a object-valued user field in the <tt>orders</tt> dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the nested <tt>orderUserName</tt> field.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX oOrderUserNameIdx ON orders(order.orderUserName) TYPE BTREE;
</pre></div></div>
<p>The following example creates an array index called <tt>oItemsPriceIdx</tt> on the <tt>price</tt> field inside the <tt>items</tt> array of the <tt>orders</tt> dataset. This index can be useful for accelerating membership queries, existential or universal quantification queries, or joins involving the <tt>price</tt> field inside this array. Unknown values cannot currently be stored inside array indexes, so <tt>EXCLUDE UNKNOWN KEY</tt> must be specified.</p></div></div>
<div class="section">
<h4><a name="Example"></a>Example</h4>
<div>
<div>
<pre class="source">CREATE INDEX oItemsPriceIdx ON orders(UNNEST items SELECT price) EXCLUDE UNKNOWN KEY;
</pre></div></div>
<p>The following example creates an open rtree index called <tt>oOrderLocIdx</tt> on the order-location field of the <tt>orders</tt> dataset. This index can be useful for accelerating queries that use the <a href="builtins.html#spatial_intersect"><tt>spatial-intersect</tt> function</a> in a predicate involving the sender-location field.</p>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX oOrderLocIDx ON orders(`order-location` : point?) TYPE RTREE ENFORCED;
</pre></div></div>
<p>The following example creates a 3-gram index called <tt>cUserIdx</tt> on the name field of the <tt>customers</tt> dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on <a href="similarity.html#NGram_Index">similarity queries</a>.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX cUserIdx ON customers(name) TYPE NGRAM(3);
</pre></div></div>
<p>The following example creates a keyword index called <tt>oCityIdx</tt> on the <tt>city</tt> within the <tt>address</tt> field of the <tt>customers</tt> dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the <tt>address</tt> field. For details refer to the document on <a href="similarity.html#Keyword_Index">similarity queries</a>.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE INDEX oCityIdx ON customers(address.city) TYPE KEYWORD;
</pre></div></div>
<p>The following example creates a special secondary index which holds only the primary keys. This index is useful for speeding up aggregation queries which involve only primary keys. The name of the index is optional. If the name is not specified, the system will generate one. When the user would like to drop this index, the metadata can be queried to find the system-generated name.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE PRIMARY INDEX cus_pk_idx ON customers;
</pre></div></div>
<p>An example query that can be accelerated using the primary-key index:</p>
<div>
<div>
<pre class="source">SELECT COUNT(*) FROM customers;
</pre></div></div>
<p>To look up the the above primary-key index, issue the following query:</p>
<div>
<div>
<pre class="source">SELECT VALUE i
FROM Metadata.`Index` i
WHERE i.DataverseName = &quot;Commerce&quot; AND i.DatasetName = &quot;customers&quot;;
</pre></div></div>
<p>The query returns:</p>
<div>
<div>
<pre class="source">[
{
&quot;DataverseName&quot;: &quot;Commerce&quot;,
&quot;DatasetName&quot;: &quot;customers&quot;,
&quot;IndexName&quot;: &quot;cus_pk_idx&quot;,
&quot;IndexStructure&quot;: &quot;BTREE&quot;,
&quot;SearchKey&quot;: [],
&quot;IsPrimary&quot;: false,
&quot;Timestamp&quot;: &quot;Fri Sep 18 14:15:51 PDT 2020&quot;,
&quot;PendingOp&quot;: 0
},
{
&quot;DataverseName&quot;: &quot;Commerce&quot;,
&quot;DatasetName&quot;: &quot;customers&quot;,
&quot;IndexName&quot;: &quot;customers&quot;,
&quot;IndexStructure&quot;: &quot;BTREE&quot;,
&quot;SearchKey&quot;: [
[
&quot;custid&quot;
]
],
&quot;IsPrimary&quot;: true,
&quot;Timestamp&quot;: &quot;Thu Jul 16 13:07:37 PDT 2020&quot;,
&quot;PendingOp&quot;: 0
}
]
</pre></div></div>
<p>Remember that <tt>CREATE PRIMARY INDEX</tt> creates a secondary index. That is the reason the <tt>IsPrimary</tt> field is false. The primary-key index can be identified by the fact that the <tt>SearchKey</tt> field is empty since it only contains primary key fields.</p></div></div>
<div class="section">
<h4><a name="Create_Synonym"></a><a name="Synonyms" id="Synonyms"> Create Synonym</a></h4>
<div class="section">
<h5><a name="CreateSynonym"></a>CreateSynonym</h5>
<p><img src="../images/diagrams/CreateSynonym.png" alt="" /></p>
<p>The <tt>CREATE SYNONYM</tt> statement creates a synonym for a given dataset or a view. This synonym may be used instead of the dataset name in <tt>SELECT</tt>, <tt>INSERT</tt>, <tt>UPSERT</tt>, <tt>DELETE</tt>, and <tt>LOAD</tt> statements, or instead of the view name in <tt>SELECT</tt> statements. The target dataset or view does not need to exist when the synonym is created. A synonym may be created for another synonym.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE DATASET customers(customersType) PRIMARY KEY custid;
CREATE SYNONYM customersSynonym FOR customers;
SELECT * FROM customersSynonym;
</pre></div></div>
<p>More information on how synonyms are resolved can be found in <a href="#Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a>.</p></div></div>
<div class="section">
<h4><a name="Create_Function"></a><a name="Create_function" id="Create_function">Create Function</a></h4>
<p>The <tt>CREATE FUNCTION</tt> statement creates a <b>named</b> function that can then be used and reused in queries. The body of a function can be any query expression involving the function&#x2019;s parameters.</p>
<div class="section">
<h5><a name="CreateFunction"></a>CreateFunction</h5>
<p><img src="../images/diagrams/CreateFunction.png" alt="" /></p></div>
<div class="section">
<h5><a name="FunctionParameters"></a>FunctionParameters</h5>
<p><img src="../images/diagrams/FunctionParameters.png" alt="" /></p></div>
<div class="section">
<h5><a name="ExternalFunctionDef"></a>ExternalFunctionDef</h5>
<p><img src="../images/diagrams/ExternalFunctionDef.png" alt="" /></p>
<p>The following is an example of a <tt>CREATE FUNCTION</tt> statement which is similar to our earlier <tt>DECLARE FUNCTION</tt> example.</p>
<p>It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE FUNCTION nameSearch(customerId) {
(SELECT c.custid, c.name
FROM customers AS c
WHERE u.custid = customerId)[0]
};
</pre></div></div>
<p>The following is an example of CREATE FUNCTION statement that replaces an existing function.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE OR REPLACE FUNCTION friendInfo(userId) {
(SELECT u.id, u.name
FROM GleambookUsers u
WHERE u.id = userId)[0]
};
</pre></div></div>
<p>The following is an example of CREATE FUNCTION statement that introduces a function with a variable number of arguments. The arguments are accessible in the function body via <tt>args</tt> array parameter.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE FUNCTION strJoin(...) {
string_join(args, &quot;,&quot;)
};
</pre></div></div>
<p>External functions can also be loaded into Libraries via the <a href="../udf.html">UDF API</a>. Given an already loaded library <tt>pylib</tt>, a function <tt>sentiment</tt> mapping to a Python method <tt>sent_model.sentiment</tt> in <tt>sentiment_mod</tt> would be as follows</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE FUNCTION sentiment(a) AS &quot;sentiment_mod&quot;, &quot;sent_model.sentiment&quot; AT pylib;
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="Create_View"></a><a name="Create_view" id="Create_view">Create View</a></h4>
<p>The <tt>CREATE VIEW</tt> statement creates a <b>named</b> view that can then be used in queries. The body of a view can be any <tt>SELECT</tt> statement.</p>
<div class="section">
<h5><a name="CreateView"></a>CreateView</h5>
<p><img src="../images/diagrams/CreateView.png" alt="" /></p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">CREATE DATASET customers(customersType) PRIMARY KEY custid;
CREATE VIEW customersView AS
SELECT c.custid, c.name
FROM customers AS c
WHERE c.address.city = &quot;Boston, MA&quot;;
SELECT * FROM customersView;
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Drop_Statement"></a><a name="Removal" id="Removal">Drop Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="DropStmnt"></a>DropStmnt</h5>
<p><img src="../images/diagrams/DropStmnt.png" alt="" /></p></div>
<div class="section">
<h5><a name="DataverseName"></a>DataverseName</h5>
<p><img src="../images/diagrams/DataverseName.png" alt="" /></p></div>
<div class="section">
<h5><a name="QualifiedName"></a>QualifiedName</h5>
<p><img src="../images/diagrams/QualifiedName.png" alt="" /></p></div>
<div class="section">
<h5><a name="DoubleQualifiedName"></a>DoubleQualifiedName</h5>
<p><img src="../images/diagrams/DoubleQualifiedName.png" alt="" /></p></div>
<div class="section">
<h5><a name="FunctionSignature"></a>FunctionSignature</h5>
<p><img src="../images/diagrams/FunctionSignature.png" alt="" /></p></div>
<div class="section">
<h5><a name="FunctionParameters"></a>FunctionParameters</h5>
<p><img src="../images/diagrams/FunctionParameters.png" alt="" /></p>
<p>The <tt>DROP</tt> statement is the inverse of the <tt>CREATE</tt> statement. It can be used to drop dataverses, datatypes, datasets, indexes, functions, and synonyms.</p>
<p>The following examples illustrate some uses of the <tt>DROP</tt> statement.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">DROP DATASET customers IF EXISTS;
DROP INDEX orders.orderidIndex;
DROP TYPE customers2.customersType;
DROP FUNCTION nameSearch@1;
DROP SYNONYM customersSynonym;
DROP VIEW customersView;
DROP DATAVERSE CommerceData;
</pre></div></div>
<p>When an artifact is dropped, it will be droppped from the current dataverse if none is specified (see the <tt>DROP DATASET</tt> example above) or from the specified dataverse (see the <tt>DROP TYPE</tt> example above) if one is specified by fully qualifying the artifact name in the <tt>DROP</tt> statement. When specifying an index to drop, the index name must be qualified by the dataset that it indexes. When specifying a function to drop, since the query language allows functions to be overloaded by their number of arguments, the identifying name of the function to be dropped must explicitly include that information. (<tt>nameSearch@1</tt> above denotes the 1-argument function named <tt>nameSearch</tt> in the current dataverse.)</p></div></div></div>
<div class="section">
<h3><a name="Load_Statement"></a><a name="Load_statement" id="Load_statement">Load Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="LoadStmnt"></a>LoadStmnt</h5>
<p><img src="../images/diagrams/LoadStmnt.png" alt="" /></p></div>
<div class="section">
<h5><a name="AdapterName"></a>AdapterName</h5>
<p><img src="../images/diagrams/AdapterName.png" alt="" /></p></div>
<div class="section">
<h5><a name="Configuration"></a>Configuration</h5>
<p><img src="../images/diagrams/Configuration.png" alt="" /></p></div>
<div class="section">
<h5><a name="KeyValuePair"></a>KeyValuePair</h5>
<p><img src="../images/diagrams/KeyValuePair.png" alt="" /></p>
<p>The <tt>LOAD</tt> statement is used to initially populate a dataset via bulk loading of data from an external file. An appropriate adapter must be selected to handle the nature of the desired external data. The <tt>LOAD</tt> statement accepts the same adapters and the same parameters as discussed earlier for External datasets. (See the <a href="../aql/externaldata.html">guide to external data</a> for more information on the available adapters.) If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.</p>
<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
<p>The following example shows how to bulk load the <tt>customers</tt> dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source"> LOAD DATASET customers USING localfs
((&quot;path&quot;=&quot;127.0.0.1:///Users/bignosqlfan/commercenew/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Modification_statements" id="Modification_statements">Modification statements</a></h2>
<div class="section">
<h3><a name="Insert_Statement"></a><a name="Inserts" id="Inserts">Insert Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="InsertStmnt"></a>InsertStmnt</h5>
<p><img src="../images/diagrams/InsertStmnt.png" alt="" /></p>
<p>The <tt>INSERT</tt> statement is used to insert new data into a dataset. The data to be inserted comes from a query expression. This expression can be as simple as a constant expression, or in general it can be any legal query. In case the dataset has an auto-generated primary key, when performing an <tt>INSERT</tt> operation, the system allows the user to manually add the auto-generated key field in the <tt>INSERT</tt> statement, or skip that field and the system will automatically generate it and add it. However, it is important to note that if the a record already exists in the dataset with the auto-generated key provided by the user, then that operation is going to fail. As a general rule, insertion will fail if the dataset already has data with the primary key value(s) being inserted.</p>
<p>Inserts are processed transactionally by the system. The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). If the query part of an insert returns a single object, then the <tt>INSERT</tt> statement will be a single, atomic transaction. If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction.</p>
<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
<p>The following example illustrates a query-based insertion.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">INSERT INTO custCopy (SELECT VALUE c FROM customers c)
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Upsert_Statement"></a><a name="Upserts" id="Upserts">Upsert Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="UpsertStmnt"></a>UpsertStmnt</h5>
<p><img src="../images/diagrams/UpsertStmnt.png" alt="" /></p>
<p>The <tt>UPSERT</tt> statement syntactically mirrors the <tt>INSERT</tt>statement discussed above. The difference lies in its semantics, which for <tt>UPSERT</tt> are &#x201c;add or replace&#x201d; instead of the <tt>INSERT</tt> &#x201c;add if not present, else error&#x201d; semantics. Whereas an <tt>INSERT</tt> can fail if another object already exists with the specified key, the analogous <tt>UPSERT</tt> will replace the previous object&#x2019;s value with that of the new object in such cases. Like the <tt>INSERT</tt> statement, the system allows the user to manually provide the auto-generated key for datasets with an auto-generated key as its primary key. This operation will insert the record if no record with that key already exists, but if a record with the key already exists, then the operation will be converted to a replace/update operation.</p>
<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
<p>The following example illustrates a query-based upsert operation.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">UPSERT INTO custCopy (SELECT VALUE c FROM customers c)
</pre></div></div>
</div></div></div>
<div class="section">
<h3><a name="Delete_Statement"></a><a name="Deletes" id="Deletes">Delete Statement</a></h3>
<div class="section">
<div class="section">
<h5><a name="DeleteStmnt"></a>DeleteStmnt</h5>
<p><img src="../images/diagrams/DeleteStmnt.png" alt="" /></p>
<p>The <tt>DELETE</tt> statement is used to delete data from a target dataset. The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the <tt>DELETE</tt> statement.</p>
<p>Deletes are processed transactionally by the system. The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). If the boolean expression for a delete identifies a single object, then the <tt>DELETE</tt> statement itself will be a single, atomic transaction. If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.</p>
<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
<p>The following examples illustrate single-object deletions.</p></div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">DELETE FROM customers c WHERE c.custid = &quot;C41&quot;;
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">DELETE FROM customers WHERE custid = &quot;C47&quot;;
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<h1><a name="Reserved_keywords" id="Reserved_keywords">Appendix 1. Reserved keywords</a></h1><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>All reserved keywords are listed in the following table:</p>
<table border="0" class="table table-striped">
<thead>
<tr class="a">
<th> </th>
<th> </th>
<th> </th>
<th> </th>
<th> </th>
<th> </th></tr>
</thead><tbody>
<tr class="b">
<td> ADAPTER </td>
<td> ALL </td>
<td> AND </td>
<td> ANY </td>
<td> APPLY </td>
<td> AS </td></tr>
<tr class="a">
<td> ASC </td>
<td> AT </td>
<td> AUTOGENERATED </td>
<td> BETWEEN </td>
<td> BTREE </td>
<td> BY </td></tr>
<tr class="b">
<td> CASE </td>
<td> CLOSED </td>
<td> COLLECTION </td>
<td> CREATE </td>
<td> COMPACTION </td>
<td> COMPACT </td></tr>
<tr class="a">
<td> CONNECT </td>
<td> CORRELATE </td>
<td> DATASET </td>
<td> DATAVERSE </td>
<td> DECLARE </td>
<td> DEFINITION </td></tr>
<tr class="b">
<td> DELETE </td>
<td> DESC </td>
<td> DISCONNECT </td>
<td> DISTINCT </td>
<td> DIV </td>
<td> DROP </td></tr>
<tr class="a">
<td> ELEMENT </td>
<td> EXPLAIN </td>
<td> ELSE </td>
<td> ENFORCED </td>
<td> END </td>
<td> EVERY </td></tr>
<tr class="b">
<td> EXCEPT </td>
<td> EXIST </td>
<td> EXTERNAL </td>
<td> FEED </td>
<td> FILTER </td>
<td> FLATTEN </td></tr>
<tr class="a">
<td> FOR </td>
<td> FROM </td>
<td> FULL </td>
<td> FULLTEXT </td>
<td> FUNCTION </td>
<td> GROUP </td></tr>
<tr class="b">
<td> HAVING </td>
<td> HINTS </td>
<td> IF </td>
<td> INTO </td>
<td> IN </td>
<td> INDEX </td></tr>
<tr class="a">
<td> INGESTION </td>
<td> INNER </td>
<td> INSERT </td>
<td> INTERNAL </td>
<td> INTERSECT </td>
<td> IS </td></tr>
<tr class="b">
<td> JOIN </td>
<td> KEYWORD </td>
<td> LEFT </td>
<td> LETTING </td>
<td> LET </td>
<td> LIKE </td></tr>
<tr class="a">
<td> LIMIT </td>
<td> LOAD </td>
<td> MISSING </td>
<td> NODEGROUP </td>
<td> NGRAM </td>
<td> NOT </td></tr>
<tr class="b">
<td> NULL </td>
<td> OFFSET </td>
<td> ON </td>
<td> OPEN </td>
<td> OR </td>
<td> ORDER </td></tr>
<tr class="a">
<td> OUTER </td>
<td> OUTPUT </td>
<td> OVER </td>
<td> PATH </td>
<td> POLICY </td>
<td> PRE-SORTED </td></tr>
<tr class="b">
<td> PRIMARY </td>
<td> RAW </td>
<td> REFRESH </td>
<td> RETURN </td>
<td> RETURNING </td>
<td> RIGHT </td></tr>
<tr class="a">
<td> RTREE </td>
<td> RUN </td>
<td> SATISFIES </td>
<td> SECONDARY </td>
<td> SELECT </td>
<td> SET </td></tr>
<tr class="b">
<td> SOME </td>
<td> START </td>
<td> STOP </td>
<td> SYNONYM </td>
<td> TEMPORARY </td>
<td> THEN </td></tr>
<tr class="a">
<td> TO </td>
<td> TRUE </td>
<td> TYPE </td>
<td> UNION </td>
<td> UNKNOWN </td>
<td> UNNEST </td></tr>
<tr class="b">
<td> UPDATE </td>
<td> UPSERT </td>
<td> USE </td>
<td> USING </td>
<td> VALUE </td>
<td> VALUED </td></tr>
<tr class="a">
<td> WHEN </td>
<td> WHERE </td>
<td> WITH </td>
<td> WRITE </td>
<td> </td>
<td> </td></tr>
</tbody>
</table><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Appendix_2._Performance_Tuning"></a><a name="Performance_tuning" id="Performance_tuning">Appendix 2. Performance Tuning</a></h2><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>The <tt>SET</tt> statement can be used to override some cluster-wide configuration parameters for a specific request:</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="SetStmnt"></a>SetStmnt</h5>
<p><img src="../images/diagrams/SetStmnt.png" alt="" /></p>
<p>As parameter identifiers are qualified names (containing a &#x2018;.&#x2019;) they have to be escaped using backticks (``). Note that changing query parameters will not affect query correctness but only impact performance characteristics, such as response time and throughput.</p></div></div></div></div>
<div class="section">
<h2><a name="Parallelism_Parameter"></a><a name="Parallelism_parameter" id="Parallelism_parameter">Parallelism Parameter</a></h2>
<p>The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) in a cluster. A user can manually specify the maximum execution parallelism for a request to scale it up and down using the following parameter:</p>
<ul>
<li><b>compiler.parallelism</b>: the maximum number of CPU cores can be used to process a query. There are three cases of the value <i>p</i> for compiler.parallelism:
<ul>
<li>
<p><i>p</i> &lt; 0 or <i>p</i> &gt; the total number of cores in a cluster: the system will use all available cores in the cluster;</p>
</li>
<li>
<p><i>p</i> = 0 (the default): the system will use the storage parallelism (the number of partitions of stored datasets) as the maximum parallelism for query processing;</p>
</li>
<li>
<p>all other cases: the system will use the user-specified number as the maximum number of CPU cores to use for executing the query.</p>
</li>
</ul>
</li>
</ul>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">SET `compiler.parallelism` &quot;16&quot;;
SELECT c.name AS cname, o.orderno AS orderno
FROM customers c JOIN orders o ON c.custid = o.custid;
</pre></div></div>
</div></div></div></div>
<div class="section">
<h2><a name="Memory_Parameters"></a><a name="Memory_parameters" id="Memory_parameters">Memory Parameters</a></h2>
<p>In the system, each blocking runtime operator such as join, group-by and order-by works within a fixed memory budget, and can gracefully spill to disks if the memory budget is smaller than the amount of data they have to hold. A user can manually configure the memory budget of those operators within a query. The supported configurable memory parameters are:</p>
<ul>
<li>
<p><b>compiler.groupmemory</b>: the memory budget that each parallel group-by operator instance can use; 32MB is the default budget.</p>
</li>
<li>
<p><b>compiler.sortmemory</b>: the memory budget that each parallel sort operator instance can use; 32MB is the default budget.</p>
</li>
<li>
<p><b>compiler.joinmemory</b>: the memory budget that each parallel hash join operator instance can use; 32MB is the default budget.</p>
</li>
<li>
<p><b>compiler.windowmemory</b>: the memory budget that each parallel window aggregate operator instance can use; 32MB is the default budget.</p>
</li>
</ul>
<p>For each memory budget value, you can use a 64-bit integer value with a 1024-based binary unit suffix (for example, B, KB, MB, GB). If there is no user-provided suffix, &#x201c;B&#x201d; is the default suffix. See the following examples.</p>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">SET `compiler.groupmemory` &quot;64MB&quot;;
SELECT c.custid, COUNT(*)
FROM customers c
GROUP BY c.custid;
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">SET `compiler.sortmemory` &quot;67108864&quot;;
SELECT VALUE o
FROM orders AS o
ORDER BY ARRAY_LENGTH(o.items) DESC;
</pre></div></div>
</div>
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">SET `compiler.joinmemory` &quot;132000KB&quot;;
SELECT c.name AS cname, o.ordeno AS orderno
FROM customers c JOIN orders o ON c.custid = o.custid;
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Parallel_Sort_Parameter"></a><a name="Parallel_sort_parameter" id="Parallel_sort_parameter">Parallel Sort Parameter</a></h2>
<p>The following parameter enables you to activate or deactivate full parallel sort for order-by operations.</p>
<p>When full parallel sort is inactive (<tt>false</tt>), each existing data partition is sorted (in parallel), and then all data partitions are merged into a single node.</p>
<p>When full parallel sort is active (<tt>true</tt>), the data is first sampled, and then repartitioned so that each partition contains data that is greater than the previous partition. The data in each partition is then sorted (in parallel), but the sorted partitions are not merged into a single node.</p>
<ul>
<li><b>compiler.sort.parallel</b>: A boolean specifying whether full parallel sort is active (<tt>true</tt>) or inactive (<tt>false</tt>). The default value is <tt>true</tt>.</li>
</ul>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">SET `compiler.sort.parallel` &quot;true&quot;;
SELECT VALUE o
FROM orders AS o
ORDER BY ARRAY_LENGTH(o.items) DESC;
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Controlling_Index-Only-Plan_Parameter"></a><a name="Index_Only" id="Index_Only">Controlling Index-Only-Plan Parameter</a></h2>
<p>By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible. For example, if a <tt>SELECT</tt> or <tt>JOIN</tt> query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer checks whether a secondary-index search alone can generate the result that the query asks for. It mainly checks two conditions: (1) predicates used in <tt>WHERE</tt> only uses the primary key field and/or secondary key field and (2) the result does not return any other fields. If these two conditions hold, it builds an index-only plan. Since an index-only plan only searches a secondary-index to answer a query, it is faster than a non-index-only plan that needs to search the primary index. However, this index-only plan can be turned off per query by setting the following parameter.</p>
<ul>
<li><b>compiler.indexonly</b>: if this is set to false, the index-only-plan will not be applied; the default value is true.</li>
</ul>
<div class="section">
<div class="section">
<div class="section">
<h5><a name="Example"></a>Example</h5>
<div>
<div>
<pre class="source">set `compiler.indexonly` &quot;false&quot;;
SELECT o.order_date AS orderdate
FROM orders o where o.order_date = &quot;2020-05-01&quot;;
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Controlling_Array-Index_Access_Method_Plan_Parameter"></a><a name="ArrayIndexFlag" id="ArrayIndexFlag">Controlling Array-Index Access Method Plan Parameter</a></h2>
<p>By default, the system attempts to utilize array indexes as an access method if an array index is present and is applicable. If you believe that your query will not benefit from an array index, toggle the parameter below.</p>
<ul>
<li><b>compiler.arrayindex</b>: if this is set to true, array indexes will be considered as an access method for applicable queries; the default value is true.</li>
</ul>
<div class="section">
<div class="section">
<h4><a name="Example"></a>Example</h4>
<div>
<div>
<pre class="source">set `compiler.arrayindex` &quot;false&quot;;
SELECT o.orderno
FROM orders o
WHERE SOME i IN o.items
SATISFIES i.price = 19.91;
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div>
<div class="section">
<h2><a name="Query_Hints"></a><a name="Query_hints" id="Query_hints">Query Hints</a></h2>
<div class="section">
<div class="section">
<h4><a name="a.E2.80.9Chash.E2.80.9D_GROUP_BY_hint"></a><a name="hash_groupby" id="hash_groupby">&#x201c;hash&#x201d; GROUP BY hint</a></h4>
<p>The system supports two algorithms for GROUP BY clause evaluation: pre-sorted and hash-based. By default it uses the pre-sorted approach: The input data is first sorted on the grouping fields and then aggregation is performed on that sorted data. The alternative is a hash-based strategy which can be enabled via a <tt>/*+ hash */</tt> GROUP BY hint: The data is aggregated using an in-memory hash-table (that can spill to disk if necessary). This approach is recommended for low-cardinality grouping fields.</p>
<div class="section">
<h5><a name="Example:"></a>Example:</h5>
<div>
<div>
<pre class="source">SELECT c.address.state, count(*)
FROM Customers AS c
/*+ hash */ GROUP BY c.address.state
</pre></div></div>
</div></div>
<div class="section">
<h4><a name="a.E2.80.9Chash-bcast.E2.80.9D_JOIN_hint"></a><a name="hash_bcast_join" id="hash_bcast_join">&#x201c;hash-bcast&#x201d; JOIN hint</a></h4>
<p>By default the system uses a partitioned-parallel hash join strategy to parallelize the execution of an equi-join. In this approach both sides of the join are repartitioned (if necessary) on a hash of the join key; potentially matching data items thus arrive at the same partition to be joined locally. This strategy is robust, but not always the fastest when one of the join sides is low cardinality and the other is high cardinality (since it scans and potentially moves the data from both sides). This special case can be better handled by broadcasting (replicating) the smaller side to all data partitions of the larger side and not moving the data from the other (larger) side. The system provides a join hint to enable this strategy: <tt>/*+ hash-bcast */</tt>. This hint forces the right side of the join to be replicated while the left side retains its original partitioning.</p>
<div class="section">
<h5><a name="Example:"></a>Example:</h5>
<div>
<div>
<pre class="source">SELECT *
FROM Orders AS o JOIN Customers AS c
ON o.customer_id /*+ hash-bcast */ = c.customer_id
</pre></div></div>
<!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div></div></div></div>
<div class="section">
<h2><a name="Appendix_3._Variable_Bindings_and_Name_Resolution"></a><a name="Variable_bindings_and_name_resolution" id="Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></h2><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>In this Appendix, we&#x2019;ll look at how variables are bound and how names are resolved. Names can appear in every clause of a query. Sometimes a name consists of just a single identifier, e.g., <tt>region</tt> or <tt>revenue</tt>. More often a name will consist of two identifiers separated by a dot, e.g., <tt>customer.address</tt>. Occasionally a name may have more than two identifiers, e.g., <tt>policy.owner.address.zipcode</tt>. <i>Resolving</i> a name means determining exactly what the (possibly multi-part) name refers to. It is necessary to have well-defined rules for how to resolve a name in cases of ambiguity. (In the absence of schemas, such cases arise more commonly, and also differently, than they do in SQL.)</p>
<p>The basic job of each clause in a query block is to bind variables. Each clause sees the variables bound by previous clauses and may bind additional variables. Names are always resolved with respect to the variables that are bound (&#x201c;in scope&#x201d;) at the place where the name use in question occurs. It is possible that the name resolution process will fail, which may lead to an empty result or an error message.</p>
<p>One important bit of background: When the system is reading a query and resolving its names, it has a list of all the available dataverses and datasets. As a result, it knows whether <tt>a.b</tt> is a valid name for dataset <tt>b</tt> in dataverse <tt>a</tt>. However, the system does not in general have knowledge of the schemas of the data inside the datasets; remember that this is a much more open world. As a result, in general the system cannot know whether any object in a particular dataset will have a field named <tt>c</tt>. These assumptions affect how errors are handled. If you try to access dataset <tt>a.b</tt> and no dataset by that name exists, you will get an error and your query will not run. However, if you try to access a field <tt>c</tt> in a collection of objects, your query will run and return <tt>missing</tt> for each object that doesn&#x2019;t have a field named <tt>c</tt> - this is because it&#x2019;s possible that some object (someday) could have such a field.</p></div>
<div class="section">
<h2><a name="Binding_Variables"></a><a name="Binding_variables" id="Binding_variables">Binding Variables</a></h2>
<p>Variables can be bound in the following ways:</p>
<ol style="list-style-type: decimal">
<li>
<p><tt>WITH</tt> and <tt>LET</tt> clauses bind a variable to the result of an expression in a straightforward way</p>
<p>Examples:</p>
<p><tt>WITH cheap_parts AS (SELECT partno FROM parts WHERE price &lt; 100)</tt> binds the variable <tt>cheap_parts</tt> to the result of the subquery.</p>
<p><tt>LET pay = salary + bonus</tt> binds the variable <tt>pay</tt> to the result of evaluating the expression <tt>salary + bonus</tt>.</p>
</li>
<li>
<p><tt>FROM</tt>, <tt>GROUP BY</tt>, and <tt>SELECT</tt> clauses have optional <tt>AS</tt> subclauses that contain an expression and a name (called an <i>iteration variable</i> in a <tt>FROM</tt> clause, or an alias in <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
<p>Examples:</p>
<p><tt>FROM customer AS c, order AS o</tt></p>
<p><tt>GROUP BY salary + bonus AS total_pay</tt></p>
<p><tt>SELECT MAX(price) AS highest_price</tt></p>
<p>An <tt>AS</tt> subclause always binds the name (as a variable) to the result of the expression (or, in the case of a <tt>FROM</tt> clause, to the <i>individual members</i> of the collection identified by the expression).</p>
<p>It&#x2019;s always a good practice to use the keyword <tt>AS</tt> when defining an alias or iteration variable. However, as in SQL, the syntax allows the keyword <tt>AS</tt> to be omitted. For example, the <tt>FROM</tt> clause above could have been written like this:</p>
<p><tt>FROM customer c, order o</tt></p>
<p>Omitting the keyword <tt>AS</tt> does not affect the binding of variables. The FROM clause in this example binds variables c and o whether the keyword AS is used or not.</p>
<p>In certain cases, a variable is automatically bound even if no alias or variable-name is specified. Whenever an expression could have been followed by an AS subclause, if the expression consists of a simple name or a path expression, that expression binds a variable whose name is the same as the simple name or the last step in the path expression. Here are some examples:</p>
<p><tt>FROM customer, order</tt> binds iteration variables named <tt>customer</tt> and <tt>order</tt></p>
<p><tt>GROUP BY address.zipcode</tt> binds a variable named <tt>zipcode</tt></p>
<p><tt>SELECT item[0].price</tt> binds a variable named <tt>price</tt></p>
<p>Note that a <tt>FROM</tt> clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn. The name of the collection remains in scope, but it is not a variable. For example, consider this <tt>FROM</tt> clause used in a self-join:</p>
<p><tt>FROM customer AS c1, customer AS c2</tt></p>
<p>This <tt>FROM</tt> clause joins the customer dataset to itself, binding the iteration variables <tt>c1</tt> and <tt>c2</tt> to objects in the left-hand-side and right-hand-side of the join, respectively. After the <tt>FROM</tt> clause, <tt>c1</tt> and <tt>c2</tt> are in scope as variables, and customer remains accessible as a dataset name but not as a variable.</p>
</li>
<li>
<p>Special rules for <tt>GROUP BY</tt>:</p>
<ul>
<li>
<p>(3A): If a <tt>GROUP BY</tt> clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself. For example:</p>
<p><tt>GROUP BY salary + bonus</tt> binds a pseudo-variable named <tt>salary + bonus</tt>.</p>
<p>This rule allows subsequent clauses to refer to the grouping expression (salary + bonus) even though its constituent variables (salary and bonus) are no longer in scope. For example, the following query is valid:</p>
<div>
<div>
<pre class="source">FROM employee
GROUP BY salary + bonus
HAVING salary + bonus &gt; 1000
SELECT salary + bonus, COUNT(*) AS how_many
</pre></div></div>
<p>While it might have been more elegant to explicitly require an alias in cases like this, the pseudo-variable rule is retained for SQL compatibility. Note that the expression <tt>salary + bonus</tt> is not <i>actually</i> evaluated in the <tt>HAVING</tt> and <tt>SELECT</tt> clauses (and could not be since <tt>salary</tt> and <tt>bonus</tt> are no longer individually in scope). Instead, the expression <tt>salary + bonus</tt> is treated as a reference to the pseudo-variable defined in the <tt>GROUP BY</tt> clause.</p>
</li>
<li>
<p>(3B): The <tt>GROUP BY</tt> clause may be followed by a <tt>GROUP AS</tt> clause that binds a variable to the group. The purpose of this variable is to make the individual objects inside the group visible to subqueries that may need to iterate over them.</p>
<p>The <tt>GROUP AS</tt> variable is bound to a multiset of objects. Each object represents one of the members of the group. Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest <tt>FROM</tt> clause (and its <tt>LET</tt> subclause, if any). These nested objects, in turn, contain the actual fields of the group-member. To understand this process, consider the following query fragment:</p>
<div>
<div>
<pre class="source">FROM parts AS p, suppliers AS s
WHERE p.suppno = s.suppno
GROUP BY p.color GROUP AS g
</pre></div></div>
<p>Suppose that the objects in <tt>parts</tt> have fields <tt>partno</tt>, <tt>color</tt>, and <tt>suppno</tt>. Suppose that the objects in suppliers have fields <tt>suppno</tt> and <tt>location</tt>.</p>
<p>Then, for each group formed by the <tt>GROUP BY</tt>, the variable g will be bound to a multiset with the following structure:</p>
<div>
<div>
<pre class="source">[ { &quot;p&quot;: { &quot;partno&quot;: &quot;p1&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s1&quot; },
&quot;s&quot;: { &quot;suppno&quot;: &quot;s1&quot;, &quot;location&quot;: &quot;Denver&quot; } },
{ &quot;p&quot;: { &quot;partno&quot;: &quot;p2&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s2&quot; },
&quot;s&quot;: { &quot;suppno&quot;: &quot;s2&quot;, &quot;location&quot;: &quot;Atlanta&quot; } },
...
]
</pre></div></div>
</li>
</ul>
</li>
</ol></div>
<div class="section">
<h2><a name="Scoping" id="Scoping">Scoping</a></h2>
<p>In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a <tt>WITH</tt> clause at the beginning of the query. More specific rules follow.</p>
<p>The clauses in a query block are conceptually processed in the following order:</p>
<ul>
<li><tt>FROM</tt> (followed by LET subclause, if any)</li>
<li><tt>WHERE</tt></li>
<li><tt>GROUP BY</tt> (followed by LET subclause, if any)</li>
<li><tt>HAVING</tt></li>
<li><tt>SELECT</tt> or <tt>SELECT VALUE</tt></li>
<li><tt>ORDER BY</tt></li>
<li><tt>OFFSET</tt></li>
<li><tt>LIMIT</tt></li>
</ul>
<p>During processing of each clause, the variables that are in scope are those variables that are bound in the following places:</p>
<ol style="list-style-type: decimal">
<li>
<p>In earlier clauses of the same query block (as defined by the ordering given above).</p>
<p>Example: <tt>FROM orders AS o SELECT o.date</tt> The variable <tt>o</tt> in the <tt>SELECT</tt> clause is bound, in turn, to each object in the dataset <tt>orders</tt>.</p>
</li>
<li>
<p>In outer query blocks in which the current query block is nested. In case of duplication, the innermost binding wins.</p>
</li>
<li>
<p>In the <tt>WITH</tt> clause (if any) at the beginning of the query.</p>
</li>
</ol>
<p>However, in a query block where a <tt>GROUP BY</tt> clause is present:</p>
<ol style="list-style-type: decimal">
<li>
<p>In clauses processed before <tt>GROUP BY</tt>, scoping rules are the same as though no GROUP BY were present.</p>
</li>
<li>
<p>In clauses processed after <tt>GROUP BY</tt>, the variables bound in the nearest <tt>FROM</tt>-clause (and its <tt>LET</tt> subclause, if any) are removed from scope and replaced by the variables bound in the <tt>GROUP BY</tt> clause (and its <tt>LET</tt> subclause, if any). However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (<tt>MIN</tt>, <tt>MAX</tt>, <tt>AVG</tt>, <tt>SUM</tt>, and <tt>COUNT</tt>). These functions still need to see the individual data items over which they are computing an aggregation. For example, after <tt>FROM employee AS e GROUP BY deptno</tt>, it would not be valid to reference <tt>e.salary</tt>, but <tt>AVG(e.salary)</tt> would be valid.</p>
</li>
</ol>
<p>Special case: In an expression inside a <tt>FROM</tt> clause, a variable is in scope if it was bound in an earlier expression in the same <tt>FROM</tt> clause. Example:</p>
<div>
<div>
<pre class="source">FROM orders AS o, o.items AS i
</pre></div></div>
<p>The reason for this special case is to support iteration over nested collections.</p>
<p>Note that, since the <tt>SELECT</tt> clause comes <i>after</i> the <tt>WHERE</tt> and <tt>GROUP BY</tt> clauses in conceptual processing order, any variables defined in <tt>SELECT</tt> are not visible in <tt>WHERE</tt> or <tt>GROUP BY</tt>. Therefore the following query will not return what might be the expected result (since in the WHERE clause, <tt>pay</tt> will be interpreted as a field in the <tt>emp</tt> object rather than as the computed value <tt>salary + bonus</tt>):</p>
<div>
<div>
<pre class="source">SELECT name, salary + bonus AS pay
FROM emp
WHERE pay &gt; 1000
ORDER BY pay
</pre></div></div>
<p>The likely intent of the query above can be accomplished as follows:</p>
<div>
<div>
<pre class="source">FROM emp AS e
LET pay = e.salary + e.bonus
WHERE pay &gt; 1000
SELECT e.name, pay
ORDER BY pay
</pre></div></div>
<p>Note that in the phrase <i>expr1</i> <tt>JOIN</tt> <i>expr2</i> <tt>ON</tt> <i>expr3</i>, variables defined in <i>expr1</i> are visible in <i>expr3</i> but not in <i>expr2</i>. Here&#x2019;s an example that will not work:</p>
<div>
<div>
<pre class="source">FROM orders AS o JOIN o.items AS i ON 1 = 1
</pre></div></div>
<p>The variable <tt>o</tt>, defined in the phrase before <tt>JOIN</tt>, cannot be used in the phrase immediately following <tt>JOIN</tt>. The probable intent of this example could be accomplished in either of the following ways:</p>
<div>
<div>
<pre class="source">FROM orders AS o UNNEST o.items AS i
FROM orders AS o, o.items AS i
</pre></div></div>
<p>To summarize this rule: You may not use left-correlation in an explicit <tt>JOIN</tt> clause.</p></div>
<div class="section">
<h2><a name="Resolving_Names"></a><a name="Resolving_names" id="Resolving_names">Resolving Names</a></h2>
<p>The process of name resolution begins with the leftmost identifier in the name. The rules for resolving the leftmost identifier are:</p>
<ol style="list-style-type: decimal">
<li>
<p><i>In a <tt>FROM</tt> clause</i>: Names in a <tt>FROM</tt> clause identify the collections over which the query block will iterate. These collections may be stored datasets, views, synonyms, or may be the results of nested query blocks. A stored dataset may be in a named dataverse or in the default dataverse. Thus, if the two-part name <tt>a.b</tt> is in a <tt>FROM</tt> clause, a might represent a dataverse and <tt>b</tt> might represent a dataset in that dataverse. Another example of a two-part name in a <tt>FROM</tt> clause is <tt>FROM orders AS o, o.items AS i</tt>. In <tt>o.items</tt>, <tt>o</tt> represents an order object bound earlier in the <tt>FROM</tt> clause, and items represents the items object inside that order.</p>
<p>The rules for resolving the leftmost identifier in a <tt>FROM</tt> clause (including a <tt>JOIN</tt> subclause), or in the expression following <tt>IN</tt> in a quantified predicate, are as follows:</p>
<ul>
<li>
<p>(1A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery).</p>
</li>
<li>
<p>(1B): Otherwise, if the identifier is the first part of a two-part name like <tt>a.b</tt>, the name is treated as <tt>dataverse.dataset</tt>. If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. If the designated dataset exists then the identifier is resolved to that dataset, othwerise if a view with given name exists then the identifier is resolved to that view, otherwise if a synonym with given name exists then the identifier is resolved to the target dataset or the target view of that synonym (potentially recursively if this synonym points to another synonym). An error will result if the designated dataset, view, or a synonym with this name does not exist.</p>
<p>Datasets and views take precedence over synonyms, so if both a dataset (or a view) and a synonym have the same name then the resolution is to the dataset. Note that there cannot be a dataset and a view with the same name.</p>
</li>
</ul>
</li>
<li>
<p><i>Elsewhere in a query block</i>: In clauses other than <tt>FROM</tt>, a name typically identifies a field of some object. For example, if the expression <tt>a.b</tt> is in a <tt>SELECT</tt> or <tt>WHERE</tt> clause, it&#x2019;s likely that <tt>a</tt> represents an object and <tt>b</tt> represents a field in that object.</p>
<p>The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are:</p>
<ul>
<li>
<p>(2A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block).</p>
</li>
<li>
<p>(2B): (The &#x201c;Single Variable Rule&#x201d;): Otherwise, if the <tt>FROM</tt> clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. For example, in the query <tt>FROM customer SELECT address</tt>, the identifier address is treated as a field in the object bound to the variable <tt>customer</tt>. At runtime, if the object bound to <tt>customer</tt> has no <tt>address</tt> field, the <tt>address</tt> expression will return <tt>missing</tt>. If the <tt>FROM</tt> clause in the current query block binds multiple variables, name resolution fails with an &#x201c;ambiguous name&#x201d; error. If there&#x2019;s no <tt>FROM</tt> clause in the current query block, name resolution fails with an &#x201c;undefined identifier&#x201d; error. Note that the Single Variable Rule searches for bound variables only in the current query block, not in outer (containing) blocks. The purpose of this rule is to permit the compiler to resolve field-references unambiguously without relying on any schema information. Also note that variables defined by <tt>LET</tt> clauses do not participate in the resolution process performed by this rule.</p>
<p>Exception: In a query that has a <tt>GROUP BY</tt> clause, the Single Variable Rule does not apply in any clauses that occur after the <tt>GROUP BY</tt> because, in these clauses, the variables bound by the <tt>FROM</tt> clause are no longer in scope. In clauses after <tt>GROUP BY</tt>, only Rule (2A) applies.</p>
</li>
</ul>
</li>
<li>
<p>In an <tt>ORDER BY</tt> clause following a <tt>UNION ALL</tt> expression:</p>
<p>The leftmost identifier is treated as a field-access on the objects that are generated by the <tt>UNION ALL</tt>. For example:</p>
<div>
<div>
<pre class="source">query-block-1
UNION ALL
query-block-2
ORDER BY salary
</pre></div></div>
<p>In the result of this query, objects that have a foo field will be ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.)</p>
</li>
<li>
<p><i>In a standalone expression</i>: If a query consists of a standalone expression then identifiers inside that expression are resolved according to Rule 1. For example, if the whole query is <tt>ARRAY_COUNT(a.b)</tt> then <tt>a.b</tt> will be treated as dataset <tt>b</tt> contained in dataverse <tt>a</tt>. Note that this rule only applies to identifiers which are located directly inside a standalone expression. Identifiers inside <tt>SELECT</tt> statements in a standalone expression are still resolved according to Rules 1-3. For example, if the whole query is <tt>ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )</tt> then <tt>salary</tt> is resolved as <tt>e.salary</tt> following the &#x201c;Single Variable Rule&#x201d; (Rule (2B)).</p>
</li>
<li>
<p>Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. The name resolves to the field at the end of the path. If this field does not exist, the value <tt>missing</tt> is returned.</p>
</li>
</ol><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
</div>
<div class="section">
<h2><a name="Appendix_4._Example_Data"></a><a name="Manual_data" id="Manual_data">Appendix 4. Example Data</a></h2><!--
! Licensed to the Apache Software Foundation (ASF) under one
! or more contributor license agreements. See the NOTICE file
! distributed with this work for additional information
! regarding copyright ownership. The ASF licenses this file
! to you under the Apache License, Version 2.0 (the
! "License"); you may not use this file except in compliance
! with the License. You may obtain a copy of the License at
!
! http://www.apache.org/licenses/LICENSE-2.0
!
! Unless required by applicable law or agreed to in writing,
! software distributed under the License is distributed on an
! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
! KIND, either express or implied. See the License for the
! specific language governing permissions and limitations
! under the License.
!-->
<p>This appendix lists the data definitions and the datasets used for the examples provided throughout this manual.</p>
<div class="section">
<h3><a name="Data_Definitions"></a><a name="definition_statements" id="definition_statements">Data Definitions</a></h3>
<div>
<div>
<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
USE Commerce;
CREATE TYPE addressType AS {
street: string,
city: string,
zipcode: string?
};
CREATE TYPE customerType AS {
custid: string,
name: string,
address: addressType?
};
CREATE DATASET customers(customerType)
PRIMARY KEY custid;
CREATE TYPE itemType AS {
itemno: int,
qty: int,
price: int
};
CREATE TYPE orderType AS {
orderno: int,
custid: string,
order_date: string,
ship_date: string?,
items: [ itemType ]
};
CREATE DATASET orders(orderType)
PRIMARY KEY orderno;
</pre></div></div>
</div>
<div class="section">
<h3><a name="Customers_Data"></a><a name="customers_data" id="customers_data">Customers Data</a></h3>
<div>
<div>
<pre class="source">[
{
&quot;custid&quot;: &quot;C13&quot;,
&quot;name&quot;: &quot;T. Cody&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;201 Main St.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;,
&quot;zipcode&quot;: &quot;63101&quot;
},
&quot;rating&quot;: 750
},
{
&quot;custid&quot;: &quot;C25&quot;,
&quot;name&quot;: &quot;M. Sinclair&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;690 River St.&quot;,
&quot;city&quot;: &quot;Hanover, MA&quot;,
&quot;zipcode&quot;: &quot;02340&quot;
},
&quot;rating&quot;: 690
},
{
&quot;custid&quot;: &quot;C31&quot;,
&quot;name&quot;: &quot;B. Pruitt&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;360 Mountain Ave.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;,
&quot;zipcode&quot;: &quot;63101&quot;
}
},
{
&quot;custid&quot;: &quot;C35&quot;,
&quot;name&quot;: &quot;J. Roberts&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;420 Green St.&quot;,
&quot;city&quot;: &quot;Boston, MA&quot;,
&quot;zipcode&quot;: &quot;02115&quot;
},
&quot;rating&quot;: 565
},
{
&quot;custid&quot;: &quot;C37&quot;,
&quot;name&quot;: &quot;T. Henry&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
&quot;city&quot;: &quot;Boston, MA&quot;,
&quot;zipcode&quot;: &quot;02115&quot;
},
&quot;rating&quot;: 750
},
{
&quot;custid&quot;: &quot;C41&quot;,
&quot;name&quot;: &quot;R. Dodge&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;150 Market St.&quot;,
&quot;city&quot;: &quot;St. Louis, MO&quot;,
&quot;zipcode&quot;: &quot;63101&quot;
},
&quot;rating&quot;: 640
},
{
&quot;custid&quot;: &quot;C47&quot;,
&quot;name&quot;: &quot;S. Logan&quot;,
&quot;address&quot;: {
&quot;street&quot;: &quot;Via del Corso&quot;,
&quot;city&quot;: &quot;Rome, Italy&quot;
},
&quot;rating&quot;: 625
}
]
</pre></div></div>
</div>
<div class="section">
<h3><a name="Orders_Data"></a><a name="orders_data" id="orders_data">Orders Data</a></h3>
<div>
<div>
<pre class="source">[
{
&quot;orderno&quot;: 1001,
&quot;custid&quot;: &quot;C41&quot;,
&quot;order_date&quot;: &quot;2020-04-29&quot;,
&quot;ship_date&quot;: &quot;2020-05-03&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 347,
&quot;qty&quot;: 5,
&quot;price&quot;: 19.99
},
{
&quot;itemno&quot;: 193,
&quot;qty&quot;: 2,
&quot;price&quot;: 28.89
}
]
},
{
&quot;orderno&quot;: 1002,
&quot;custid&quot;: &quot;C13&quot;,
&quot;order_date&quot;: &quot;2020-05-01&quot;,
&quot;ship_date&quot;: &quot;2020-05-03&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 460,
&quot;qty&quot;: 95,
&quot;price&quot;: 100.99
},
{
&quot;itemno&quot;: 680,
&quot;qty&quot;: 150,
&quot;price&quot;: 8.75
}
]
},
{
&quot;orderno&quot;: 1003,
&quot;custid&quot;: &quot;C31&quot;,
&quot;order_date&quot;: &quot;2020-06-15&quot;,
&quot;ship_date&quot;: &quot;2020-06-16&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 120,
&quot;qty&quot;: 2,
&quot;price&quot;: 88.99
},
{
&quot;itemno&quot;: 460,
&quot;qty&quot;: 3,
&quot;price&quot;: 99.99
}
]
},
{
&quot;orderno&quot;: 1004,
&quot;custid&quot;: &quot;C35&quot;,
&quot;order_date&quot;: &quot;2020-07-10&quot;,
&quot;ship_date&quot;: &quot;2020-07-15&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 680,
&quot;qty&quot;: 6,
&quot;price&quot;: 9.99
},
{
&quot;itemno&quot;: 195,
&quot;qty&quot;: 4,
&quot;price&quot;: 35
}
]
},
{
&quot;orderno&quot;: 1005,
&quot;custid&quot;: &quot;C37&quot;,
&quot;order_date&quot;: &quot;2020-08-30&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 460,
&quot;qty&quot;: 2,
&quot;price&quot;: 99.98
},
{
&quot;itemno&quot;: 347,
&quot;qty&quot;: 120,
&quot;price&quot;: 22
},
{
&quot;itemno&quot;: 780,
&quot;qty&quot;: 1,
&quot;price&quot;: 1500
},
{
&quot;itemno&quot;: 375,
&quot;qty&quot;: 2,
&quot;price&quot;: 149.98
}
]
},
{
&quot;orderno&quot;: 1006,
&quot;custid&quot;: &quot;C41&quot;,
&quot;order_date&quot;: &quot;2020-09-02&quot;,
&quot;ship_date&quot;: &quot;2020-09-04&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 680,
&quot;qty&quot;: 51,
&quot;price&quot;: 25.98
},
{
&quot;itemno&quot;: 120,
&quot;qty&quot;: 65,
&quot;price&quot;: 85
},
{
&quot;itemno&quot;: 460,
&quot;qty&quot;: 120,
&quot;price&quot;: 99.98
}
]
},
{
&quot;orderno&quot;: 1007,
&quot;custid&quot;: &quot;C13&quot;,
&quot;order_date&quot;: &quot;2020-09-13&quot;,
&quot;ship_date&quot;: &quot;2020-09-20&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 185,
&quot;qty&quot;: 5,
&quot;price&quot;: 21.99
},
{
&quot;itemno&quot;: 680,
&quot;qty&quot;: 1,
&quot;price&quot;: 20.5
}
]
},
{
&quot;orderno&quot;: 1008,
&quot;custid&quot;: &quot;C13&quot;,
&quot;order_date&quot;: &quot;2020-10-13&quot;,
&quot;items&quot;: [
{
&quot;itemno&quot;: 460,
&quot;qty&quot;: 20,
&quot;price&quot;: 99.99
}
]
},
{
&quot;orderno&quot;: 1009,
&quot;custid&quot;: &quot;C13&quot;,
&quot;order_date&quot;: &quot;2020-10-13&quot;,
&quot;items&quot;: []
}
]
</pre></div></div></div></div>
</div>
</div>
</div>
<hr/>
<footer>
<div class="container-fluid">
<div class="row-fluid">
<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
feather logo, and the Apache AsterixDB project logo are either
registered trademarks or trademarks of The Apache Software
Foundation in the United States and other countries.
All other marks mentioned may be trademarks or registered
trademarks of their respective owners.
</div>
</div>
</div>
</footer>
</body>
</html>