blob: 1831c23fc2409ace9817f625e64b3ef21ce2757a [file] [log] [blame]
Ian Maxon858061a2022-05-12 19:11:28 -07001<!DOCTYPE html>
2<!--
3 | Generated by Apache Maven Doxia Site Renderer 1.8.1 from target/generated-site/markdown/sqlpp/manual.md at 2022-05-12
4 | Rendered using Apache Maven Fluido Skin 1.7
5-->
6<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7 <head>
8 <meta charset="UTF-8" />
9 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10 <meta name="Date-Revision-yyyymmdd" content="20220512" />
11 <meta http-equiv="Content-Language" content="en" />
12 <title>AsterixDB &#x2013; The SQL++ Query Language</title>
13 <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
14 <link rel="stylesheet" href="../css/site.css" />
15 <link rel="stylesheet" href="../css/print.css" media="print" />
16 <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
17
18 </head>
19 <body class="topBarDisabled">
20 <div class="container-fluid">
21 <div id="banner">
22 <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png" alt="AsterixDB"/></a></div>
23 <div class="pull-right"></div>
24 <div class="clear"><hr/></div>
25 </div>
26
27 <div id="breadcrumbs">
28 <ul class="breadcrumb">
29 <li id="publishDate">Last Published: 2022-05-12</li>
30 <li id="projectVersion" class="pull-right">Version: 0.9.8</li>
31 <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
32 </ul>
33 </div>
34 <div class="row-fluid">
35 <div id="leftColumn" class="span2">
36 <div class="well sidebar-nav">
37 <ul class="nav nav-list">
38 <li class="nav-header">Get Started - Installation</li>
39 <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
40 <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
41 <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
42 <li class="nav-header">AsterixDB Primer</li>
43 <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
44 <li class="nav-header">Data Model</li>
45 <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
46 <li class="nav-header">Queries</li>
47 <li class="active"><a href="#"><span class="none"></span>The SQL++ Query Language</a></li>
48 <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
49 <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
50 <li class="nav-header">API/SDK</li>
51 <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
52 <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
53 <li class="nav-header">Advanced Features</li>
54 <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
55 <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
56 <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
57 <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
58 <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
59 <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
60 <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
61 <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
62 <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
63 <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
64 <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
65 <li class="nav-header">Deprecated</li>
66 <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
67 <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
68 <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
69</ul>
70 <hr />
71 <div id="poweredBy">
72 <div class="clear"></div>
73 <div class="clear"></div>
74 <div class="clear"></div>
75 <div class="clear"></div>
76<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" /></a>
77 </div>
78 </div>
79 </div>
80 <div id="bodyColumn" class="span10" >
81<!--
82 ! Licensed to the Apache Software Foundation (ASF) under one
83 ! or more contributor license agreements. See the NOTICE file
84 ! distributed with this work for additional information
85 ! regarding copyright ownership. The ASF licenses this file
86 ! to you under the Apache License, Version 2.0 (the
87 ! "License"); you may not use this file except in compliance
88 ! with the License. You may obtain a copy of the License at
89 !
90 ! http://www.apache.org/licenses/LICENSE-2.0
91 !
92 ! Unless required by applicable law or agreed to in writing,
93 ! software distributed under the License is distributed on an
94 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
95 ! KIND, either express or implied. See the License for the
96 ! specific language governing permissions and limitations
97 ! under the License.
98 !-->
99<h1>The SQL++ Query Language</h1>
100<ul>
101
102<li><a href="#Introduction">1. Introduction</a></li>
103<li><a href="#Expressions">2. Expressions</a>
104<ul>
105
106<li><a href="#Operator_expressions">Operator Expressions</a>
107<ul>
108
109<li><a href="#Arithmetic_operators">Arithmetic Operators</a></li>
110<li><a href="#Collection_operators">Collection Operators</a></li>
111<li><a href="#Comparison_operators">Comparison Operators</a></li>
112<li><a href="#Logical_operators">Logical Operators</a></li>
113</ul>
114</li>
115<li><a href="#Quantified_expressions">Quantified Expressions</a></li>
116<li><a href="#Path_expressions">Path Expressions</a></li>
117<li><a href="#Primary_expressions">Primary Expressions</a>
118<ul>
119
120<li><a href="#Literals">Literals</a></li>
121<li><a href="#Variable_references">Identifiers and Variable References</a></li>
122<li><a href="#Parameter_references">Parameter References</a></li>
123<li><a href="#Parenthesized_expressions">Parenthesized Expressions</a></li>
124<li><a href="#Function_call_expressions">Function Calls</a></li>
125<li><a href="#Case_expressions">Case Expressions</a></li>
126<li><a href="#Constructors">Constructors</a></li>
127</ul>
128</li>
129</ul>
130</li>
131<li><a href="#Queries">3. Queries</a>
132<ul>
133
134<li><a href="#Select_clauses">SELECT Clauses</a>
135<ul>
136
137<li><a href="#Select_element">Select Value</a></li>
138<li><a href="#SQL_select">SQL-style Select</a></li>
139<li><a href="#Select_star">Select *</a></li>
140<li><a href="#Select_distinct">Select Distinct</a></li>
141<li><a href="#Unnamed_projections">Unnamed Projections</a></li>
142<li><a href="#Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></li>
143</ul>
144</li>
145<li><a href="#From_clauses">FROM clauses</a>
146<ul>
147
148<li><a href="#Joins">Joins</a></li>
149</ul>
150</li>
151<li><a href="#Let_clauses">LET Clauses</a></li>
152<li><a href="#WHERE_Clause">WHERE Clause</a></li>
153<li><a href="#Grouping">Grouping</a>
154<ul>
155
156<li><a href="#GROUP_BY_Clause">GROUP BY Clause</a></li>
157<li><a href="#HAVING_Clause">HAVING Clause</a></li>
158<li><a href="#Aggregation_PseudoFunctions">Aggregation Pseudo-functions</a></li>
159<li><a href="#GROUP_AS_Clause">GROUP AS Clause</a></li>
160</ul>
161</li>
162<li><a href="#Union_all">Selection and UNION ALL</a></li>
163<li><a href="#With_clauses">WITH Clauses</a></li>
164<li><a href="#Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></li>
165<li><a href="#Subqueries">Subqueries</a></li>
166</ul>
167</li>
168<li><a href="#Over_clauses">4. Window Functions</a>
169<ul>
170
171<li><a href="#Window_function_call">Window Function Call</a>
172<ul>
173
174<li><a href="#Window_function_arguments">Window Function Arguments</a></li>
175<li><a href="#Window_function_options">Window Function Options</a></li>
176<li><a href="#Window_frame_variable">Window Frame Variable</a></li>
177<li><a href="#Window_definition">Window Definition</a></li>
178</ul>
179</li>
180</ul>
181</li>
182<li><a href="#Errors">5. Errors</a>
183<ul>
184
185<li><a href="#Syntax_errors">Syntax Errors</a></li>
186<li><a href="#Identifier_resolution_errors">Identifier Resolution Errors</a></li>
187<li><a href="#Type_errors">Type Errors</a></li>
188<li><a href="#Resource_errors">Resource Errors</a></li>
189</ul>
190</li>
191<li><a href="#Vs_SQL-92">6.Differences from SQL-92</a></li>
192<li><a href="#DDL_and_DML_statements">7. DDL and DML Statements</a>
193<ul>
194
195<li><a href="#Lifecycle_management_statements">Lifecycle Management Statements</a>
196<ul>
197
198<li><a href="#Use">Use Statement</a></li>
199<li><a href="#Sets">Set Statement</a></li>
200<li><a href="#Functions">Function Declaration</a></li>
201<li><a href="#Create">Create Statement</a>
202<ul>
203
204<li><a href="#Dataverses">Create Dataverse</a></li>
205<li><a href="#Types">Create Type</a></li>
206<li><a href="#Datasets">Create Dataset</a></li>
207<li><a href="#Indices">Create Index</a></li>
208<li><a href="#Synonyms">Create Synonym</a></li>
209<li><a href="#Create_function">Create Function</a></li>
210<li><a href="#Create_view">Create View</a></li>
211</ul>
212</li>
213<li><a href="#Removal">Drop Statement</a></li>
214<li><a href="#Load_statement">Load Statement</a></li>
215</ul>
216</li>
217<li><a href="#Modification_statements">Modification Statements</a>
218<ul>
219
220<li><a href="#Inserts">Insert Statement</a></li>
221<li><a href="#Upserts">Upsert Statement</a></li>
222<li><a href="#Deletes">Delete Statement</a></li>
223</ul>
224</li>
225</ul>
226</li>
227<li><a href="#Reserved_keywords">Appendix 1. Reserved Keywords</a></li>
228<li><a href="#Performance_tuning">Appendix 2. Performance Tuning</a>
229<ul>
230
231<li><a href="#Parallelism_parameter">Parallelism Parameter</a></li>
232<li><a href="#Memory_parameters">Memory Parameters</a></li>
233<li><a href="#Query_hints">Query Hints</a></li>
234</ul>
235</li>
236<li><a href="#Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></li>
237<li><a href="#Manual_data">Appendix 4. Example Data</a>
238<ul>
239
240<li><a href="#definition_statements">Data Definitions</a></li>
241<li><a href="#customers_data">Customers Dataset</a></li>
242<li><a href="#orders_data">Orders Dataset</a></li>
243</ul>
244</li>
245</ul><!--
246 ! Licensed to the Apache Software Foundation (ASF) under one
247 ! or more contributor license agreements. See the NOTICE file
248 ! distributed with this work for additional information
249 ! regarding copyright ownership. The ASF licenses this file
250 ! to you under the Apache License, Version 2.0 (the
251 ! "License"); you may not use this file except in compliance
252 ! with the License. You may obtain a copy of the License at
253 !
254 ! http://www.apache.org/licenses/LICENSE-2.0
255 !
256 ! Unless required by applicable law or agreed to in writing,
257 ! software distributed under the License is distributed on an
258 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
259 ! KIND, either express or implied. See the License for the
260 ! specific language governing permissions and limitations
261 ! under the License.
262 !-->
263
264<h1><a name="Introduction" id="Introduction">1. Introduction</a></h1>
265<p>This document is intended as a reference guide to the full syntax and semantics of AsterixDB&#x2019;s query language, a SQL-based language for working with semistructured data. The language is a derivative of SQL++, a declarative query language for JSON data which is largely backwards compatible with SQL. SQL++ originated from research in the FORWARD project at UC San Diego, and it has much in common with SQL; some differences exist due to the different data models that the two languages were designed to serve. SQL was designed for interacting with the flat, schema-ified world of relational databases, while SQL++ generalizes SQL to also handle nested data formats (like JSON) and the schema-optional (or even schema-less) data models of modern NoSQL and BigData systems.</p>
266<p>In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model (<a href="../datamodel.html">ADM</a>), a data model based on a superset of JSON with an enriched and flexible type system. New AsterixDB users are encouraged to read and work through the (much friendlier) guide &#x201c;<a href="primer-sqlpp.html">AsterixDB 101: An ADM and SQL++ Primer</a>&#x201d; before attempting to make use of this document. In addition, readers are advised to read through the <a href="../datamodel.html">Asterix Data Model (ADM) reference guide</a> first as well, as an understanding of the data model is a prerequisite to understanding SQL++.</p>
267<p>In what follows, we detail the features of the SQL++ language in a grammar-guided manner. We list and briefly explain each of the productions in the query grammar, offering examples (and results) for clarity. In this manual, we will explain how to use the various features of SQL++ using two datasets named <tt>customers</tt> and <tt>orders</tt>. Each dataset is a collection of objects. The contents of the example datasets can be found at the end of this manual in <a href="#Manual_data">Appendix 4</a>.</p>
268<p>For additional reading on SQL++ and more examples, refer to <a class="externalLink" href="https://asterixdb.apache.org/files/SQL_Book.pdf">SQL++ for SQL Users: A Tutorial</a>.</p><!--
269 ! Licensed to the Apache Software Foundation (ASF) under one
270 ! or more contributor license agreements. See the NOTICE file
271 ! distributed with this work for additional information
272 ! regarding copyright ownership. The ASF licenses this file
273 ! to you under the Apache License, Version 2.0 (the
274 ! "License"); you may not use this file except in compliance
275 ! with the License. You may obtain a copy of the License at
276 !
277 ! http://www.apache.org/licenses/LICENSE-2.0
278 !
279 ! Unless required by applicable law or agreed to in writing,
280 ! software distributed under the License is distributed on an
281 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
282 ! KIND, either express or implied. See the License for the
283 ! specific language governing permissions and limitations
284 ! under the License.
285 !-->
286
287<h1><a name="Expressions" id="Expressions">2. Expressions</a></h1><!--
288 ! Licensed to the Apache Software Foundation (ASF) under one
289 ! or more contributor license agreements. See the NOTICE file
290 ! distributed with this work for additional information
291 ! regarding copyright ownership. The ASF licenses this file
292 ! to you under the Apache License, Version 2.0 (the
293 ! "License"); you may not use this file except in compliance
294 ! with the License. You may obtain a copy of the License at
295 !
296 ! http://www.apache.org/licenses/LICENSE-2.0
297 !
298 ! Unless required by applicable law or agreed to in writing,
299 ! software distributed under the License is distributed on an
300 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
301 ! KIND, either express or implied. See the License for the
302 ! specific language governing permissions and limitations
303 ! under the License.
304 !-->
305
306<p>An expression is a language fragment that can be evaluated to return a value. For example, the expression 2 + 3 returns the value 5. Expressions are the building blocks from which queries are constructed. SQL++ supports nearly all of the kinds of expressions in SQL, and adds some new kinds as well.</p>
307<p>SQL++ is an orthogonal language, which means that expressions can serve as operands of higher level expressions. By nesting expressions inside other expressions, complex queries can be built up. Any expression can be enclosed in parentheses to establish operator precedence.</p>
308<p>In this section, we&#x2019;ll discuss the various kinds of SQL++ expressions.</p>
309<div class="section">
310<div class="section">
311<div class="section">
312<div class="section">
313<h5><a name="Expr"></a>Expr</h5>
314<p><img src="../images/diagrams/Expr.png" alt="" /></p></div></div></div></div>
315<div class="section">
316<h2><a name="Operator_Expressions"></a><a name="Operator_expressions" id="Operator_expressions">Operator Expressions</a></h2>
317<p>Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows:</p>
318<div class="section">
319<div class="section">
320<div class="section">
321<h5><a name="OperatorExpr"></a>OperatorExpr</h5>
322<p><img src="../images/diagrams/OperatorExpr.png" alt="" /></p>
323<p>The language provides a full set of operators that you can use within its statements. Here are the categories of operators:</p>
324<ul>
325
326<li><a href="#Arithmetic_operators">Arithmetic Operators</a>, to perform basic mathematical operations;</li>
327<li><a href="#Collection_operators">Collection Operators</a>, to evaluate expressions on collections or objects;</li>
328<li><a href="#Comparison_operators">Comparison Operators</a>, to compare two expressions;</li>
329<li><a href="#Logical_operators">Logical Operators</a>, to combine operators using Boolean logic.</li>
330</ul>
331<p>The following table summarizes the precedence order (from higher to lower) of the major unary and binary operators:</p>
332<table border="0" class="table table-striped">
333<thead>
334
335<tr class="a">
336<th> Operator </th>
337<th> Operation </th></tr>
338</thead><tbody>
339
340<tr class="b">
341<td> EXISTS, NOT EXISTS </td>
342<td> Collection emptiness testing </td></tr>
343<tr class="a">
344<td> ^ </td>
345<td> Exponentiation </td></tr>
346<tr class="b">
347<td> *, /, DIV, MOD (%) </td>
348<td> Multiplication, division, modulo </td></tr>
349<tr class="a">
350<td> +, - </td>
351<td> Addition, subtraction </td></tr>
352<tr class="b">
353<td> || </td>
354<td> String concatenation </td></tr>
355<tr class="a">
356<td> IS NULL, IS NOT NULL, IS MISSING, IS NOT MISSING, <br />IS UNKNOWN, IS NOT UNKNOWN, IS VALUED, IS NOT VALUED </td>
357<td> Unknown value comparison </td></tr>
358<tr class="b">
359<td> BETWEEN, NOT BETWEEN </td>
360<td> Range comparison (inclusive on both sides) </td></tr>
361<tr class="a">
362<td> =, !=, &lt;&gt;, &lt;, &gt;, &lt;=, &gt;=, LIKE, NOT LIKE, IN, NOT IN, IS DISTINCT FROM, IS NOT DISTINCT FROM </td>
363<td> Comparison </td></tr>
364<tr class="b">
365<td> NOT </td>
366<td> Logical negation </td></tr>
367<tr class="a">
368<td> AND </td>
369<td> Conjunction </td></tr>
370<tr class="b">
371<td> OR </td>
372<td> Disjunction </td></tr>
373</tbody>
374</table>
375<p>In general, if any operand evaluates to a <tt>MISSING</tt> value, the enclosing operator will return <tt>MISSING</tt>; if none of the operands evaluates to a <tt>MISSING</tt> value but there is an operand which evaluates to a <tt>NULL</tt> value, the enclosing operator will return <tt>NULL</tt>. However, there are a few exceptions listed in <a href="#Comparison_operators">comparison operators</a> and <a href="#Logical_operators">logical operators</a>.</p></div></div></div>
376<div class="section">
377<h3><a name="Arithmetic_Operators"></a><a name="Arithmetic_operators" id="Arithmetic_operators">Arithmetic Operators</a></h3>
378<p>Arithmetic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values.</p>
379<table border="0" class="table table-striped">
380<thead>
381
382<tr class="a">
383<th> Operator </th>
384<th> Purpose </th>
385<th> Example </th></tr>
386</thead><tbody>
387
388<tr class="b">
389<td> +, - </td>
390<td> As unary operators, they denote a <br />positive or negative expression </td>
391<td> SELECT VALUE -1; </td></tr>
392<tr class="a">
393<td> +, - </td>
394<td> As binary operators, they add or subtract </td>
395<td> SELECT VALUE 1 + 2; </td></tr>
396<tr class="b">
397<td> * </td>
398<td> Multiply </td>
399<td> SELECT VALUE 4 * 2; </td></tr>
400<tr class="a">
401<td> / </td>
402<td> Divide (returns a value of type <tt>double</tt> if both operands are integers)</td>
403<td> SELECT VALUE 5 / 2; </td></tr>
404<tr class="b">
405<td> DIV </td>
406<td> Divide (returns an integer value if both operands are integers) </td>
407<td> SELECT VALUE 5 DIV 2; </td></tr>
408<tr class="a">
409<td> MOD (%) </td>
410<td> Modulo </td>
411<td> SELECT VALUE 5 % 2; </td></tr>
412<tr class="b">
413<td> ^ </td>
414<td> Exponentiation </td>
415<td> SELECT VALUE 2^3; </td></tr>
416<tr class="a">
417<td> || </td>
418<td> String concatenation </td>
419<td> SELECT VALUE &#x201c;ab&#x201d;||&#x201c;c&#x201d;||&#x201c;d&#x201d;; </td></tr>
420</tbody>
421</table></div>
422<div class="section">
423<h3><a name="Collection_Operators"></a><a name="Collection_operators" id="Collection_operators">Collection Operators</a></h3>
424<p>Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS).</p>
425<table border="0" class="table table-striped">
426<thead>
427
428<tr class="a">
429<th> Operator </th>
430<th> Purpose </th>
431<th> Example </th></tr>
432</thead><tbody>
433
434<tr class="b">
435<td> IN </td>
436<td> Membership test </td>
437<td> FROM customers AS c <br />WHERE c.address.zipcode IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *; </td></tr>
438<tr class="a">
439<td> NOT IN </td>
440<td> Non-membership test </td>
441<td> FROM customers AS c <br />WHERE c.address.zipcode NOT IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *;</td></tr>
442<tr class="b">
443<td> EXISTS </td>
444<td> Check whether a collection is not empty </td>
445<td> FROM orders AS o <br />WHERE EXISTS o.items <br /> SELECT *;</td></tr>
446<tr class="a">
447<td> NOT EXISTS </td>
448<td> Check whether a collection is empty </td>
449<td> FROM orders AS o <br />WHERE NOT EXISTS o.items <br /> SELECT *; </td></tr>
450</tbody>
451</table></div>
452<div class="section">
453<h3><a name="Comparison_Operators"></a><a name="Comparison_operators" id="Comparison_operators">Comparison Operators</a></h3>
454<p>Comparison operators are used to compare values.</p>
455<p>The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons. SQL++ (and JSON) has two ways of representing missing information in an object &#x2014; the presence of the field with a NULL for its value (as in SQL), and the absence of the field (which JSON permits). For example, the first of the following objects represents Jack, whose friend is Jill. In the other examples, Jake is friendless &#xe0; la SQL, with a friend field that is NULL, while Joe is friendless in a more natural (for JSON) way, i.e., by not having a friend field.</p>
456<div class="section">
457<div class="section">
458<h5><a name="Examples"></a>Examples</h5>
459
460<div>
461<div>
462<pre class="source">{&quot;name&quot;: &quot;Jack&quot;, &quot;friend&quot;: &quot;Jill&quot;}
463
464{&quot;name&quot;: &quot;Jake&quot;, &quot;friend&quot;: NULL}
465
466{&quot;name&quot;: &quot;Joe&quot;}
467</pre></div></div>
468
469<p>The following table enumerates all of the comparison operators available in SQL++.</p>
470<table border="0" class="table table-striped">
471<thead>
472
473<tr class="a">
474<th> Operator </th>
475<th> Purpose </th>
476<th> Example </th></tr>
477</thead><tbody>
478
479<tr class="b">
480<td> IS NULL </td>
481<td> Test if a value is NULL </td>
482<td>FROM customers AS c <br />WHERE c.name IS NULL <br /> SELECT *; </td></tr>
483<tr class="a">
484<td> IS NOT NULL </td>
485<td> Test if a value is not NULL </td>
486<td> FROM customers AS c <br />WHERE c.name IS NOT NULL <br /> SELECT *; </td></tr>
487<tr class="b">
488<td> IS MISSING </td>
489<td> Test if a value is MISSING </td>
490<td> FROM customers AS c <br />WHERE c.name IS MISSING <br /> SELECT *; </td></tr>
491<tr class="a">
492<td> IS NOT MISSING </td>
493<td> Test if a value is not MISSING </td>
494<td> FROM customers AS c <br />WHERE c.name IS NOT MISSING <br /> SELECT *; </td></tr>
495<tr class="b">
496<td> IS UNKNOWN </td>
497<td> Test if a value is NULL or MISSING </td>
498<td> FROM customers AS c <br />WHERE c.name IS UNKNOWN <br /> SELECT *; </td></tr>
499<tr class="a">
500<td> IS NOT UNKNOWN </td>
501<td> Test if a value is neither NULL nor MISSING </td>
502<td> FROM customers AS c <br />WHERE c.name IS NOT UNKNOWN <br /> SELECT *; </td></tr>
503<tr class="b">
504<td> IS KNOWN (IS VALUED) </td>
505<td> Test if a value is neither NULL nor MISSING </td>
506<td> FROM customers AS c <br />WHERE c.name IS KNOWN <br /> SELECT *; </td></tr>
507<tr class="a">
508<td> IS NOT KNOWN (IS NOT VALUED) </td>
509<td> Test if a value is NULL or MISSING </td>
510<td> FROM customers AS c <br />WHERE c.name IS NOT KNOWN <br /> SELECT *; </td></tr>
511<tr class="b">
512<td> BETWEEN </td>
513<td> Test if a value is between a start value and a end value. The comparison is inclusive of both the start and end values. </td>
514<td> FROM customers AS c WHERE c.rating BETWEEN 600 AND 700 SELECT *;</td></tr>
515<tr class="a">
516<td> = </td>
517<td> Equality test </td>
518<td> FROM customers AS c <br /> WHERE c.rating = 640 <br /> SELECT *; </td></tr>
519<tr class="b">
520<td> != </td>
521<td> Inequality test </td>
522<td> FROM customers AS c <br /> WHERE c.rating != 640 <br /> SELECT *;</td></tr>
523<tr class="a">
524<td> &lt;&gt; </td>
525<td> Inequality test </td>
526<td> FROM customers AS c <br /> WHERE c.rating &lt;&gt; 640 <br /> SELECT *;</td></tr>
527<tr class="b">
528<td> &lt; </td>
529<td> Less than </td>
530<td> FROM customers AS c <br /> WHERE c.rating &lt; 640 <br /> SELECT *; </td></tr>
531<tr class="a">
532<td> &gt; </td>
533<td> Greater than </td>
534<td> FROM customers AS c <br /> WHERE c.rating &gt; 640 <br /> SELECT *; </td></tr>
535<tr class="b">
536<td> &lt;= </td>
537<td> Less than or equal to </td>
538<td> FROM customers AS c <br /> WHERE c.rating &lt;= 640 <br /> SELECT *; </td></tr>
539<tr class="a">
540<td> &gt;= </td>
541<td> Greater than or equal to </td>
542<td> FROM customers AS c <br /> WHERE c.rating &gt;= 640 <br /> SELECT *; </td></tr>
543<tr class="b">
544<td> LIKE </td>
545<td> Test if the left side matches a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
546<td> FROM customers AS c WHERE c.name LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
547<tr class="a">
548<td> NOT LIKE </td>
549<td> Test if the left side does not match a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
550<td> FROM customers AS c WHERE c.name NOT LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
551<tr class="b">
552<td> IS DISTINCT FROM </td>
553<td> Inequality test that that treats NULL values as equal to each other and MISSING values as equal to each other </td>
554<td> FROM orders AS o <br /> WHERE o.order_date IS DISTINCT FROM o.ship_date <br /> SELECT *; </td>
555<td> </td></tr>
556<tr class="a">
557<td> IS NOT DISTINCT FROM </td>
558<td> Equality test that treats NULL values as equal to each other and MISSING values as equal to each other </td>
559<td> FROM orders AS o <br /> WHERE o.order_date IS NOT DISTINCT FROM o.ship_date <br /> SELECT *; </td></tr>
560</tbody>
561</table>
562<p>The following table summarizes how the missing value comparison operators work.</p>
563<table border="0" class="table table-striped">
564<thead>
565
566<tr class="a">
567<th> Operator </th>
568<th> Non-NULL/Non-MISSING value </th>
569<th> NULL value</th>
570<th> MISSING value</th></tr>
571</thead><tbody>
572
573<tr class="b">
574<td> IS NULL </td>
575<td> FALSE </td>
576<td> TRUE </td>
577<td> MISSING </td></tr>
578<tr class="a">
579<td> IS NOT NULL </td>
580<td> TRUE </td>
581<td> FALSE </td>
582<td> MISSING </td></tr>
583<tr class="b">
584<td> IS MISSING </td>
585<td> FALSE </td>
586<td> FALSE </td>
587<td> TRUE </td></tr>
588<tr class="a">
589<td> IS NOT MISSING </td>
590<td> TRUE </td>
591<td> TRUE </td>
592<td> FALSE </td></tr>
593<tr class="b">
594<td> IS UNKNOWN </td>
595<td> FALSE </td>
596<td> TRUE </td>
597<td> TRUE </td></tr>
598<tr class="a">
599<td> IS NOT UNKNOWN </td>
600<td> TRUE </td>
601<td> FALSE </td>
602<td> FALSE</td></tr>
603<tr class="b">
604<td> IS KNOWN (IS VALUED) </td>
605<td> TRUE </td>
606<td> FALSE </td>
607<td> FALSE </td></tr>
608<tr class="a">
609<td> IS NOT KNOWN (IS NOT VALUED) </td>
610<td> FALSE </td>
611<td> TRUE </td>
612<td> TRUE </td></tr>
613</tbody>
614</table></div></div></div>
615<div class="section">
616<h3><a name="Logical_Operators"></a><a name="Logical_operators" id="Logical_operators">Logical Operators</a></h3>
617<p>Logical operators perform logical <tt>NOT</tt>, <tt>AND</tt>, and <tt>OR</tt> operations over Boolean values (<tt>TRUE</tt> and <tt>FALSE</tt>) plus <tt>NULL</tt> and <tt>MISSING</tt>.</p>
618<table border="0" class="table table-striped">
619<thead>
620
621<tr class="a">
622<th> Operator </th>
623<th> Purpose </th>
624<th> Example </th></tr>
625</thead><tbody>
626
627<tr class="b">
628<td> NOT </td>
629<td> Returns true if the following condition is false, otherwise returns false </td>
630<td> SELECT VALUE NOT 1 = 1; <br /> Returns FALSE </td></tr>
631<tr class="a">
632<td> AND </td>
633<td> Returns true if both branches are true, otherwise returns false </td>
634<td> SELECT VALUE 1 = 2 AND 1 = 1; <br /> Returns FALSE</td></tr>
635<tr class="b">
636<td> OR </td>
637<td> Returns true if one branch is true, otherwise returns false </td>
638<td> SELECT VALUE 1 = 2 OR 1 = 1; <br /> Returns TRUE </td></tr>
639</tbody>
640</table>
641<p>The following table is the truth table for <tt>AND</tt> and <tt>OR</tt>.</p>
642<table border="0" class="table table-striped">
643<thead>
644
645<tr class="a">
646<th> A </th>
647<th> B </th>
648<th> A AND B </th>
649<th> A OR B </th></tr>
650</thead><tbody>
651
652<tr class="b">
653<td> TRUE </td>
654<td> TRUE </td>
655<td> TRUE </td>
656<td> TRUE </td></tr>
657<tr class="a">
658<td> TRUE </td>
659<td> FALSE </td>
660<td> FALSE </td>
661<td> TRUE </td></tr>
662<tr class="b">
663<td> TRUE </td>
664<td> NULL </td>
665<td> NULL </td>
666<td> TRUE </td></tr>
667<tr class="a">
668<td> TRUE </td>
669<td> MISSING </td>
670<td> MISSING </td>
671<td> TRUE </td></tr>
672<tr class="b">
673<td> FALSE </td>
674<td> FALSE </td>
675<td> FALSE </td>
676<td> FALSE </td></tr>
677<tr class="a">
678<td> FALSE </td>
679<td> NULL </td>
680<td> FALSE </td>
681<td> NULL </td></tr>
682<tr class="b">
683<td> FALSE </td>
684<td> MISSING </td>
685<td> FALSE </td>
686<td> MISSING </td></tr>
687<tr class="a">
688<td> NULL </td>
689<td> NULL </td>
690<td> NULL </td>
691<td> NULL </td></tr>
692<tr class="b">
693<td> NULL </td>
694<td> MISSING </td>
695<td> MISSING </td>
696<td> NULL </td></tr>
697<tr class="a">
698<td> MISSING </td>
699<td> MISSING </td>
700<td> MISSING </td>
701<td> MISSING </td></tr>
702</tbody>
703</table>
704<p>The following table demonstrates the results of <tt>NOT</tt> on all possible inputs.</p>
705<table border="0" class="table table-striped">
706<thead>
707
708<tr class="a">
709<th> A </th>
710<th> NOT A </th></tr>
711</thead><tbody>
712
713<tr class="b">
714<td> TRUE </td>
715<td> FALSE </td></tr>
716<tr class="a">
717<td> FALSE </td>
718<td> TRUE </td></tr>
719<tr class="b">
720<td> NULL </td>
721<td> NULL </td></tr>
722<tr class="a">
723<td> MISSING </td>
724<td> MISSING </td></tr>
725</tbody>
726</table></div></div>
727<div class="section">
728<h2><a name="Quantified_Expressions"></a><a name="Quantified_expressions" id="Quantified_expressions">Quantified Expressions</a></h2>
729<div class="section">
730<div class="section">
731<div class="section">
732<h5><a name="QuantifiedExpr"></a>QuantifiedExpr</h5>
733<p><img src="../images/diagrams/QuantifiedExpr.png" alt="" /></p>
734<p>Synonym for <tt>SOME</tt>: <tt>ANY</tt></p>
735<p>Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.</p>
736<p>The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the set [1, 2, 3] of integers is less than three. The first example yields <tt>FALSE</tt> and second example yields <tt>TRUE</tt>.</p>
737<p>It is useful to note that if the set were instead the empty set, the first expression would yield <tt>TRUE</tt> (&#x201c;every&#x201d; value in an empty set satisfies the condition) while the second expression would yield <tt>FALSE</tt> (since there isn&#x2019;t &#x201c;some&#x201d; value, as there are no values in the set, that satisfies the condition). To express a universal predicate that yields <tt>FALSE</tt> with the empty set, we would use the quantifier <tt>SOME AND EVERY</tt> in lieu of <tt>EVERY</tt>.</p>
738<p>A quantified expression will return a <tt>NULL</tt> (or <tt>MISSING</tt>) if the first expression in it evaluates to <tt>NULL</tt> (or <tt>MISSING</tt>). Otherwise, a type error will be raised if the first expression in a quantified expression does not return a collection.</p></div>
739<div class="section">
740<h5><a name="Examples"></a>Examples</h5>
741
742<div>
743<div>
744<pre class="source">EVERY x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 -- &#x278a;
745SOME x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 -- &#x278b;
746</pre></div></div>
747
748<p>&#x2780; Returns <tt>FALSE</tt><br />
749&#x2781; Returns <tt>TRUE</tt></p></div></div></div></div>
750<div class="section">
751<h2><a name="Path_Expressions"></a><a name="Path_expressions" id="Path_expressions">Path Expressions</a></h2>
752<div class="section">
753<div class="section">
754<div class="section">
755<h5><a name="PathExpr"></a>PathExpr</h5>
756<p><img src="../images/diagrams/PathExpr.png" alt="" /></p>
757<p>Components of complex types in the data model are accessed via path expressions. Path access can be applied to the result of a query expression that yields an instance of a complex type, for example, an object or an array instance.</p>
758<p>For objects, path access is based on field names, and it accesses the field whose name was specified.</p>
759<p>For arrays, path access is based on (zero-based) array-style indexing. Array indices can be used to retrieve either a single element from an array, or a whole subset of an array. Accessing a single element is achieved by providing a single index argument (zero-based element position), while obtaining a subset of an array is achieved by providing the <tt>start</tt> and <tt>end</tt> (zero-based) index positions; the returned subset is from position <tt>start</tt> to position <tt>end - 1</tt>; the <tt>end</tt> position argument is optional. If a position argument is negative then the element position is counted from the end of the array (<tt>-1</tt> addresses the last element, <tt>-2</tt> next to last, and so on).</p>
760<p>Multisets have similar behavior to arrays, except for retrieving arbitrary items as the order of items is not fixed in multisets.</p>
761<p>Attempts to access non-existent fields or out-of-bound array elements produce the special value <tt>MISSING</tt>. Type errors will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.</p>
762<p>The following examples illustrate field access for an object, index-based element access or subset retrieval of an array, and also a composition thereof.</p></div>
763<div class="section">
764<h5><a name="Examples"></a>Examples</h5>
765
766<div>
767<div>
768<pre class="source">({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array -- &#x278a;
769([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[2] -- &#x278b;
770([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-1] -- &#x278c;
771({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array[2] -- &#x278d;
772([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:2] -- &#x278e;
773([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:] -- &#x278f;
774([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-2:-1] -- &#x2790;
775</pre></div></div>
776
777<p>&#x2780; Returns <tt>[[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]</tt><br />
778&#x2781; Returns <tt>[&quot;c&quot;]</tt><br />
779&#x2782; Returns <tt>[&quot;c&quot;]</tt><br />
780&#x2783; Returns <tt>[&quot;c&quot;]</tt><br />
781&#x2784; Returns <tt>[[&quot;a&quot;, &quot;b&quot;]]</tt><br />
782&#x2785; Returns <tt>[[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]</tt><br />
783&#x2786; Returns <tt>[[&quot;b&quot;]]</tt></p></div></div></div></div>
784<div class="section">
785<h2><a name="Primary_Expressions"></a><a name="Primary_expressions" id="Primary_expressions">Primary Expressions</a></h2>
786<div class="section">
787<div class="section">
788<div class="section">
789<h5><a name="PrimaryExpr"></a>PrimaryExpr</h5>
790<p><img src="../images/diagrams/PrimaryExpr.png" alt="" /></p>
791<p>The most basic building block for any expression in SQL++ is Primary Expression. This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object, array, or multiset of data model instances).</p></div></div></div>
792<div class="section">
793<h3><a name="Literals" id="Literals">Literals</a></h3>
794<div class="section">
795<div class="section">
796<h5><a name="Literal"></a>Literal</h5>
797<p><img src="../images/diagrams/Literal.png" alt="" /></p>
798<p>The simplest kind of expression is a literal that directly represents a value in JSON format. Here are some examples:</p>
799
800<div>
801<div>
802<pre class="source">-42
803&quot;Hello&quot;
804true
805false
806null
807</pre></div></div>
808
809<p>Numeric literals may include a sign and an optional decimal point. They may also be written in exponential notation, like this:</p>
810
811<div>
812<div>
813<pre class="source">5e2
814-4.73E-2
815</pre></div></div>
816
817<p>String literals may be enclosed in either single quotes or double quotes. Inside a string literal, the delimiter character for that string must be &#x201c;escaped&#x201d; by a backward slash, as in these examples:</p>
818
819<div>
820<div>
821<pre class="source">&quot;I read \&quot;War and Peace\&quot; today.&quot;
822'I don\'t believe everything I read.'
823</pre></div></div>
824
825<p>The table below shows how to escape characters in SQL++.</p>
826<table border="0" class="table table-striped">
827<thead>
828
829<tr class="a">
830<th>Character Name </th>
831<th>Escape Method</th></tr>
832</thead><tbody>
833
834<tr class="b">
835<td>Single Quote</td>
836<td> <tt>\'</tt></td></tr>
837<tr class="a">
838<td>Double Quote</td>
839<td><tt>\&quot;</tt></td></tr>
840<tr class="b">
841<td>Backslash</td>
842<td><tt>\\</tt></td></tr>
843<tr class="a">
844<td>Slash</td>
845<td><tt>\/</tt></td></tr>
846<tr class="b">
847<td>Backspace</td>
848<td><tt>\b</tt></td></tr>
849<tr class="a">
850<td>Formfeed</td>
851<td><tt>\f</tt></td></tr>
852<tr class="b">
853<td>Newline</td>
854<td><tt>\n</tt></td></tr>
855<tr class="a">
856<td>CarriageReturn</td>
857<td><tt>\r</tt></td></tr>
858<tr class="b">
859<td>EscapeTab</td>
860<td><tt>\t</tt></td></tr>
861</tbody>
862</table></div></div></div>
863<div class="section">
864<h3><a name="Identifiers_and_Variable_References"></a><a name="Variable_references" id="Variable_references">Identifiers and Variable References</a></h3>
865<p>Like SQL, SQL++ makes use of a language construct called an <i>identifier</i>. An identifier starts with an alphabetic character or the underscore character _ , and contains only case-sensitive alphabetic characters, numeric digits, or the special characters _ and $. It is also possible for an identifier to include other special characters, or to be the same as a reserved word, by enclosing the identifier in back-ticks (it&#x2019;s then called a <i>delimited identifier</i>). Identifiers are used in variable names and in certain other places in SQL++ syntax, such as in path expressions, which we&#x2019;ll discuss soon. Here are some examples of identifiers:</p>
866
867<div>
868<div>
869<pre class="source">X
870customer_name
871`SELECT`
872`spaces in here`
873`@&amp;#`
874</pre></div></div>
875
876<p>A very simple kind of SQL++ expression is a variable, which is simply an identifier. As in SQL, a variable can be bound to a value, which may be an input dataset, some intermediate result during processing of a query, or the final result of a query. We&#x2019;ll learn more about variables when we discuss queries.</p>
877<p>Note that the SQL++ rules for delimiting strings and identifiers are different from the SQL rules. In SQL, strings are always enclosed in single quotes, and double quotes are used for delimited identifiers.</p></div>
878<div class="section">
879<h3><a name="Parameter_References"></a><a name="Parameter_references" id="Parameter_references">Parameter References</a></h3>
880<p>A parameter reference is an external variable. Its value is provided using the <a href="../api.html#queryservice">statement execution API</a>.</p>
881<p>Parameter references come in two forms, <i>Named Parameter References</i> and <i>Positional Parameter References</i>.</p>
882<p>Named parameter references consist of the &#x201c;$&#x201d; symbol followed by an identifier or delimited identifier.</p>
883<p>Positional parameter references can be either a &#x201c;$&#x201d; symbol followed by one or more digits or a &#x201c;?&#x201d; symbol. If numbered, positional parameters start at 1. &#x201c;?&#x201d; parameters are interpreted as $1 to $N based on the order in which they appear in the statement.</p>
884<p>Parameter references may appear as shown in the below examples:</p>
885<div class="section">
886<div class="section">
887<h5><a name="Examples"></a>Examples</h5>
888
889<div>
890<div>
891<pre class="source">$id
892$1
893?
894</pre></div></div>
895
896<p>An error will be raised in the parameter is not bound at query execution time.</p></div></div></div>
897<div class="section">
898<h3><a name="Parenthesized_Expressions"></a><a name="Parenthesized_expressions" id="Parenthesized_expressions">Parenthesized Expressions</a></h3>
899<div class="section">
900<div class="section">
901<h5><a name="ParenthesizedExpr"></a>ParenthesizedExpr</h5>
902<p><img src="../images/diagrams/ParenthesizedExpr.png" alt="" /></p></div>
903<div class="section">
904<h5><a name="Subquery"></a>Subquery</h5>
905<p><img src="../images/diagrams/Subquery.png" alt="" /></p>
906<p>An expression can be parenthesized to control the precedence order or otherwise clarify a query. A <a href="#Subqueries">subquery</a> (nested <a href="#Union_all">selection</a>) may also be enclosed in parentheses. For more on these topics please see their respective sections.</p>
907<p>The following expression evaluates to the value 2.</p></div>
908<div class="section">
909<h5><a name="Example"></a>Example</h5>
910
911<div>
912<div>
913<pre class="source">( 1 + 1 )
914</pre></div></div>
915</div></div></div>
916<div class="section">
917<h3><a name="Function_Calls"></a><a name="Function_call_expressions" id="Function_call_expressions">Function Calls</a></h3>
918<div class="section">
919<div class="section">
920<h5><a name="FunctionCall"></a>FunctionCall</h5>
921<p><img src="../images/diagrams/FunctionCall.png" alt="" /></p></div>
922<div class="section">
923<h5><a name="OrdinaryFunctionCall"></a>OrdinaryFunctionCall</h5>
924<p><img src="../images/diagrams/OrdinaryFunctionCall.png" alt="" /></p></div>
925<div class="section">
926<h5><a name="AggregateFunctionCall"></a>AggregateFunctionCall</h5>
927<p><img src="../images/diagrams/AggregateFunctionCall.png" alt="" /></p></div>
928<div class="section">
929<h5><a name="DataverseName"></a>DataverseName</h5>
930<p><img src="../images/diagrams/DataverseName.png" alt="" /></p>
931<p>Functions are included in SQL++, like most languages, as a way to package useful functionality or to componentize complicated or reusable computations. A function call is a legal query expression that represents the value resulting from the evaluation of its body expression with the given parameter bindings; the parameter value bindings can themselves be any expressions in SQL++.</p>
932<p>Note that Window functions, and aggregate functions used as window functions, have a more complex syntax. Window function calls are described in the section on <a href="#Over_clauses">Window Queries</a>.</p>
933<p>Also note that FILTER expressions can only be specified when calling <a href="#Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a>.</p>
934<p>The following example is a function call expression whose value is 8.</p></div>
935<div class="section">
936<h5><a name="Example"></a>Example</h5>
937
938<div>
939<div>
940<pre class="source">length('a string')
941</pre></div></div>
942</div></div></div>
943<div class="section">
944<h3><a name="Case_Expressions"></a><a name="Case_expressions" id="Case_expressions">Case Expressions</a></h3>
945<div class="section">
946<div class="section">
947<h5><a name="CaseExpr"></a>CaseExpr</h5>
948<p><img src="../images/diagrams/CaseExpr.png" alt="" /></p></div>
949<div class="section">
950<h5><a name="SimpleCaseExpr"></a>SimpleCaseExpr</h5>
951<p><img src="../images/diagrams/SimpleCaseExpr.png" alt="" /></p></div>
952<div class="section">
953<h5><a name="SearchedCaseExpr"></a>SearchedCaseExpr</h5>
954<p><img src="../images/diagrams/SearchedCaseExpr.png" alt="" /></p>
955<p>In a simple <tt>CASE</tt> expression, the query evaluator searches for the first <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pair in which the <tt>WHEN</tt> expression is equal to the expression following <tt>CASE</tt> and returns the expression following <tt>THEN</tt>. If none of the <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pairs meet this condition, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, <tt>NULL</tt> is returned.</p>
956<p>In a searched CASE expression, the query evaluator searches from left to right until it finds a <tt>WHEN</tt> expression that is evaluated to <tt>TRUE</tt>, and then returns its corresponding <tt>THEN</tt> expression. If no condition is found to be <tt>TRUE</tt>, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, it returns <tt>NULL</tt>.</p>
957<p>The following example illustrates the form of a case expression.</p></div>
958<div class="section">
959<h5><a name="Example"></a>Example</h5>
960
961<div>
962<div>
963<pre class="source">CASE (2 &lt; 3) WHEN true THEN &quot;yes&quot; ELSE &quot;no&quot; END
964</pre></div></div>
965</div></div></div>
966<div class="section">
967<h3><a name="Constructors" id="Constructors">Constructors</a></h3>
968<div class="section">
969<div class="section">
970<h5><a name="Constructor"></a>Constructor</h5>
971<p><img src="../images/diagrams/Constructor.png" alt="" /></p></div>
972<div class="section">
973<h5><a name="ObjectConstructor"></a>ObjectConstructor</h5>
974<p><img src="../images/diagrams/ObjectConstructor.png" alt="" /></p></div>
975<div class="section">
976<h5><a name="ArrayConstructor"></a>ArrayConstructor</h5>
977<p><img src="../images/diagrams/ArrayConstructor.png" alt="" /></p></div>
978<div class="section">
979<h5><a name="ParenthesizedArrayConstructor"></a>ParenthesizedArrayConstructor</h5>
980<p><img src="../images/diagrams/ParenthesizedArrayConstructor.png" alt="" /></p></div>
981<div class="section">
982<h5><a name="MultisetConstructor"></a>MultisetConstructor</h5>
983<p><img src="../images/diagrams/MultisetConstructor.png" alt="" /></p>
984<p>Structured JSON values can be represented by constructors, as in these examples:</p>
985
986<div>
987<div>
988<pre class="source">{ &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 42 } -- &#x278a;
989[ 1, 2, &quot;Hello&quot;, null ] -- &#x278b;
990</pre></div></div>
991
992<p>&#x2780; An object<br />
993&#x2781; An array</p>
994<p>In a constructed object, the names of the fields must be strings (either literal strings or computed strings), and an object may not contain any duplicate names. Of course, structured literals can be nested, as in this example:</p>
995
996<div>
997<div>
998<pre class="source">[ {&quot;name&quot;: &quot;Bill&quot;,
999 &quot;address&quot;:
1000 {&quot;street&quot;: &quot;25 Main St.&quot;,
1001 &quot;city&quot;: &quot;Cincinnati, OH&quot;
1002 }
1003 },
1004 {&quot;name&quot;: &quot;Mary&quot;,
1005 &quot;address&quot;:
1006 {&quot;street&quot;: &quot;107 Market St.&quot;,
1007 &quot;city&quot;: &quot;St. Louis, MO&quot;
1008 }
1009 }
1010]
1011</pre></div></div>
1012
1013<p>The array items in an array constructor, and the field-names and field-values in an object constructor, may be represented by expressions. For example, suppose that the variables firstname, lastname, salary, and bonus are bound to appropriate values. Then structured values might be constructed by the following expressions:</p>
1014<p>An object:</p>
1015
1016<div>
1017<div>
1018<pre class="source">{
1019 &quot;name&quot;: firstname || &quot; &quot; || lastname,
1020 &quot;income&quot;: salary + bonus
1021}
1022</pre></div></div>
1023
1024<p>An array:</p>
1025
1026<div>
1027<div>
1028<pre class="source">[&quot;1984&quot;, lastname, salary + bonus, null]
1029</pre></div></div>
1030
1031<p>If only one expression is specified instead of the field-name/field-value pair in an object constructor then this expression is supposed to provide the field value. The field name is then automatically generated based on the kind of the value expression as in Q2.1:</p>
1032<ul>
1033
1034<li>If it is a variable reference expression then the generated field name is the name of that variable.</li>
1035<li>If it is a field access expression then the generated field name is the last identifier in that expression.</li>
1036<li>For all other cases, a compilation error will be raised.</li>
1037</ul></div>
1038<div class="section">
1039<h5><a name="Example"></a>Example</h5>
1040<p>(Q2.1)</p>
1041
1042<div>
1043<div>
1044<pre class="source">FROM customers AS c
1045WHERE c.custid = &quot;C47&quot;
1046SELECT VALUE {c.name, c.rating}
1047</pre></div></div>
1048
1049<p>This query outputs:</p>
1050
1051<div>
1052<div>
1053<pre class="source">[
1054 {
1055 &quot;name&quot;: &quot;S. Logan&quot;,
1056 &quot;rating&quot;: 625
1057 }
1058]
1059</pre></div></div>
1060<!--
1061 ! Licensed to the Apache Software Foundation (ASF) under one
1062 ! or more contributor license agreements. See the NOTICE file
1063 ! distributed with this work for additional information
1064 ! regarding copyright ownership. The ASF licenses this file
1065 ! to you under the Apache License, Version 2.0 (the
1066 ! "License"); you may not use this file except in compliance
1067 ! with the License. You may obtain a copy of the License at
1068 !
1069 ! http://www.apache.org/licenses/LICENSE-2.0
1070 !
1071 ! Unless required by applicable law or agreed to in writing,
1072 ! software distributed under the License is distributed on an
1073 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1074 ! KIND, either express or implied. See the License for the
1075 ! specific language governing permissions and limitations
1076 ! under the License.
1077 !-->
1078
1079<h1><a name="Queries" id="Queries">3. Queries</a></h1><!--
1080 ! Licensed to the Apache Software Foundation (ASF) under one
1081 ! or more contributor license agreements. See the NOTICE file
1082 ! distributed with this work for additional information
1083 ! regarding copyright ownership. The ASF licenses this file
1084 ! to you under the Apache License, Version 2.0 (the
1085 ! "License"); you may not use this file except in compliance
1086 ! with the License. You may obtain a copy of the License at
1087 !
1088 ! http://www.apache.org/licenses/LICENSE-2.0
1089 !
1090 ! Unless required by applicable law or agreed to in writing,
1091 ! software distributed under the License is distributed on an
1092 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1093 ! KIND, either express or implied. See the License for the
1094 ! specific language governing permissions and limitations
1095 ! under the License.
1096 !-->
1097
1098<p>A <i>query</i> can be an expression, or it can be constructed from blocks of code called <i>query blocks</i>. A query block may contain several clauses, including <tt>SELECT</tt>, <tt>FROM</tt>, <tt>LET</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt>.</p></div>
1099<div class="section">
1100<h5><a name="Query"></a>Query</h5>
1101<p><img src="../images/diagrams/Query.png" alt="" /></p></div>
1102<div class="section">
1103<h5><a name="Selection"></a>Selection</h5>
1104<p><img src="../images/diagrams/Selection.png" alt="" /></p></div>
1105<div class="section">
1106<h5><a name="QueryBlock"></a>QueryBlock</h5>
1107<p><img src="../images/diagrams/QueryBlock.png" alt="" /></p></div>
1108<div class="section">
1109<h5><a name="StreamGenerator"></a>StreamGenerator</h5>
1110<p><img src="../images/diagrams/StreamGenerator.png" alt="" /></p>
1111<p>Note that, unlike SQL, SQL++ allows the <tt>SELECT</tt> clause to appear either at the beginning or at the end of a query block. For some queries, placing the <tt>SELECT</tt> clause at the end may make a query block easier to understand, because the <tt>SELECT</tt> clause refers to variables defined in the other clauses.</p></div></div></div></div>
1112<div class="section">
1113<h2><a name="SELECT_Clause"></a><a name="Select_clauses" id="Select_clauses">SELECT Clause</a></h2>
1114<div class="section">
1115<div class="section">
1116<div class="section">
1117<h5><a name="SelectClause"></a>SelectClause</h5>
1118<p><img src="../images/diagrams/SelectClause.png" alt="" /></p></div>
1119<div class="section">
1120<h5><a name="Projection"></a>Projection</h5>
1121<p><img src="../images/diagrams/Projection.png" alt="" /></p>
1122<p>Synonyms for <tt>VALUE</tt>: <tt>ELEMENT</tt>, <tt>RAW</tt></p>
1123<p>In a query block, the <tt>FROM</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt> clauses (if present) are collectively called the Stream Generator. All these clauses, taken together, generate a stream of tuples of bound variables. The <tt>SELECT</tt> clause then uses these bound variables to generate the output of the query block.</p>
1124<p>For example, the clause <tt>FROM customers AS c</tt> scans over the <tt>customers</tt> collection, binding the variable <tt>c</tt> to each <tt>customer</tt> object in turn, producing a stream of bindings.</p>
1125<p>Here&#x2019;s a slightly more complex example of a stream generator:</p></div>
1126<div class="section">
1127<h5><a name="Example"></a>Example</h5>
1128
1129<div>
1130<div>
1131<pre class="source">FROM customers AS c, orders AS o
1132WHERE c.custid = o.custid
1133</pre></div></div>
1134
1135<p>In this example, the <tt>FROM</tt> clause scans over the customers and orders collections, producing a stream of variable pairs (<tt>c</tt>, <tt>o</tt>) in which <tt>c</tt> is bound to a <tt>customer</tt> object and <tt>o</tt> is bound to an <tt>order</tt> object. The <tt>WHERE</tt> clause then retains only those pairs in which the custid values of the two objects match.</p>
1136<p>The output of the query block is a collection containing one output item for each tuple produced by the stream generator. If the stream generator produces no tuples, the output of the query block is an empty collection. Depending on the <tt>SELECT</tt> clause, each output item may be an object or some other kind of value.</p>
1137<p>In addition to using the variables bound by previous clauses, the <tt>SELECT</tt> clause may create and bind some additional variables. For example, the clause <tt>SELECT salary + bonus AS pay</tt> creates the variable <tt>pay</tt> and binds it to the value of <tt>salary + bonus</tt>. This variable may then be used in a later <tt>ORDER BY</tt> clause.</p>
1138<p>In SQL++, the <tt>SELECT</tt> clause may appear either at the beginning or at the end of a query block. Since the <tt>SELECT</tt> clause depends on variables that are bound in the other clauses, the examples in this section place <tt>SELECT</tt> at the end of the query blocks.</p></div></div></div>
1139<div class="section">
1140<h3><a name="SELECT_VALUE"></a><a name="Select_element" id="Select_element">SELECT VALUE</a></h3>
1141<p>The <tt>SELECT VALUE</tt> clause returns an array or multiset that contains the results of evaluating the <tt>VALUE</tt> expression, with one evaluation being performed per &#x201c;binding tuple&#x201d; (i.e., per <tt>FROM</tt> clause item) satisfying the statement&#x2019;s selection criteria. If there is no <tt>FROM</tt> clause, the expression after <tt>VALUE</tt> is evaluated once with no binding tuples (except those inherited from an outer environment).</p>
1142<div class="section">
1143<div class="section">
1144<h5><a name="Example"></a>Example</h5>
1145<p>(Q3.1)</p>
1146
1147<div>
1148<div>
1149<pre class="source">SELECT VALUE 1;
1150</pre></div></div>
1151
1152<p>Result:</p>
1153
1154<div>
1155<div>
1156<pre class="source">[
1157 1
1158]
1159</pre></div></div>
1160</div>
1161<div class="section">
1162<h5><a name="Example"></a>Example</h5>
1163<p>(Q3.2) The following query returns the names of all customers whose rating is above 650.</p>
1164
1165<div>
1166<div>
1167<pre class="source">FROM customers AS c
1168WHERE c.rating &gt; 650
1169SELECT VALUE name;
1170</pre></div></div>
1171
1172<p>Result:</p>
1173
1174<div>
1175<div>
1176<pre class="source">[
1177 &quot;T. Cody&quot;,
1178 &quot;M. Sinclair&quot;,
1179 &quot;T. Henry&quot;
1180]
1181</pre></div></div>
1182</div></div></div>
1183<div class="section">
1184<h3><a name="SQL-style_SELECT"></a><a name="SQL_select" id="SQL_select">SQL-style SELECT</a></h3>
1185<p>Traditional SQL-style <tt>SELECT</tt> syntax is also supported in SQL++, however the result of a query is not guaranteed to preserve the order of expressions in the <tt>SELECT</tt> clause.</p>
1186<div class="section">
1187<div class="section">
1188<h5><a name="Example"></a>Example</h5>
1189<p>(Q3.3) The following query returns the names and customers ids of any customers whose rating is 750.</p>
1190
1191<div>
1192<div>
1193<pre class="source">FROM customers AS c
1194WHERE c.rating = 750
1195SELECT c.name AS customer_name, c.custid AS customer_id;
1196</pre></div></div>
1197
1198<p>Result:</p>
1199
1200<div>
1201<div>
1202<pre class="source">[
1203 {
1204 &quot;customer_id&quot;: &quot;C13&quot;,
1205 &quot;customer_name&quot;: &quot;T. Cody&quot;
1206 },
1207 {
1208 &quot;customer_id&quot;: &quot;C37&quot;,
1209 &quot;customer_name&quot;: &quot;T. Henry&quot;
1210 }
1211]
1212</pre></div></div>
1213</div></div></div>
1214<div class="section">
1215<h3><a name="SELECT_.2A"></a><a name="Select_star" id="Select_star">SELECT *</a></h3>
1216<p>As in SQL, the phrase <tt>SELECT *</tt> suggests, &#x201c;select everything.&#x201d;</p>
1217<p>For each binding tuple in the stream, <tt>SELECT *</tt> produces an output object. For each variable in the binding tuple, the output object contains a field: the name of the field is the name of the variable, and the value of the field is the value of the variable. Essentially, <tt>SELECT *</tt> means, &#x201c;return all the bound variables, with their names and values.&#x201d;</p>
1218<p>The effect of <tt>SELECT *</tt> can be illustrated by an example based on two small collections named <tt>ages</tt> and <tt>eyes</tt>. The contents of the two collections are as follows:</p>
1219<p><tt>ages</tt>:</p>
1220
1221<div>
1222<div>
1223<pre class="source">[
1224 { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1225 { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
1226]
1227</pre></div></div>
1228
1229<p><tt>eyes</tt>:</p>
1230
1231<div>
1232<div>
1233<pre class="source">[
1234 { &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;brown&quot; },
1235 { &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;blue&quot; }
1236]
1237</pre></div></div>
1238
1239<p>The following example applies <tt>SELECT *</tt> to a single collection.</p>
1240<div class="section">
1241<div class="section">
1242<h5><a name="Example"></a>Example</h5>
1243<p>(Q3.4a) Return all the information in the <tt>ages</tt> collection.</p>
1244
1245<div>
1246<div>
1247<pre class="source">FROM ages AS a
1248SELECT * ;
1249</pre></div></div>
1250
1251<p>Result:</p>
1252
1253<div>
1254<div>
1255<pre class="source">[
1256 { &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1257 },
1258 { &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32}
1259 }
1260]
1261</pre></div></div>
1262
1263<p>Note that the variable-name <tt>a</tt> appears in the query result. If the <tt>FROM</tt> clause had been simply <tt>FROM ages</tt> (omitting <tt>AS a</tt>), the variable-name in the query result would have been <tt>ages</tt>.</p>
1264<p>The next example applies <tt>SELECT *</tt> to a join of two collections.</p></div>
1265<div class="section">
1266<h5><a name="Example"></a>Example</h5>
1267<p>(Q3.4b) Return all the information in a join of <tt>ages</tt> and <tt>eyes</tt> on matching name fields.</p>
1268
1269<div>
1270<div>
1271<pre class="source">FROM ages AS a, eyes AS e
1272WHERE a.name = e.name
1273SELECT * ;
1274</pre></div></div>
1275
1276<p>Result:</p>
1277
1278<div>
1279<div>
1280<pre class="source">[
1281 { &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1282 &quot;e&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;Brown&quot; }
1283 },
1284 { &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 },
1285 &quot;e&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;Blue&quot; }
1286 }
1287]
1288</pre></div></div>
1289
1290<p>Note that the result of <tt>SELECT *</tt> in SQL++ is more complex than the result of <tt>SELECT *</tt> in SQL.</p></div></div></div>
1291<div class="section">
1292<h3><a name="SELECT_variable..2A"></a><a name="Select_variable_star" id="Select_variable_star">SELECT <i>variable</i>.*</a></h3>
1293<p>SQL++ has an alternative version of <tt>SELECT *</tt> in which the star is preceded by a variable.</p>
1294<p>Whereas the version without a named variable means, &#x201c;return all the bound variables, with their names and values,&#x201d; <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> means &#x201c;return only the named variable, and return only its value, not its name.&#x201d;</p>
1295<p>The following example can be compared with (Q3.4a) to see the difference between the two versions of <tt>SELECT *</tt>:</p>
1296<div class="section">
1297<div class="section">
1298<h5><a name="Example"></a>Example</h5>
1299<p>(Q3.4c) Return all information in the <tt>ages</tt> collection.</p>
1300
1301<div>
1302<div>
1303<pre class="source">FROM ages AS a
1304SELECT a.*
1305</pre></div></div>
1306
1307<p>Result:</p>
1308
1309<div>
1310<div>
1311<pre class="source">[
1312 { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1313 { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
1314]
1315</pre></div></div>
1316
1317<p>Note that, for queries over a single collection, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> returns a simpler result and therefore may be preferable to <tt>SELECT *</tt>.</p>
1318<p>In fact, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt>, like <tt>SELECT *</tt> in SQL, is equivalent to a <tt>SELECT</tt> clause that enumerates all the fields of the collection, as in (Q3.4d):</p></div>
1319<div class="section">
1320<h5><a name="Example"></a>Example</h5>
1321<p>(Q3.4d) Return all the information in the <tt>ages</tt> collection.</p>
1322
1323<div>
1324<div>
1325<pre class="source">FROM ages AS a
1326SELECT a.name, a.age
1327</pre></div></div>
1328
1329<p>(same result as (Q3.4c))</p>
1330<p><tt>SELECT</tt> <i>variable</i> <tt>.*</tt> has an additional application. It can be used to return all the fields of a nested object. To illustrate this use, we will use the <tt>customers</tt> dataset in the example database &#x2014; see <a href="#Manual_data">Appendix 4</a>.</p></div>
1331<div class="section">
1332<h5><a name="Example"></a>Example</h5>
1333<p>(Q3.4e) In the <tt>customers</tt> dataset, return all the fields of the <tt>address</tt> objects that have zipcode &#x201c;02340&#x201d;.</p>
1334
1335<div>
1336<div>
1337<pre class="source">FROM customers AS c
1338WHERE c.address.zipcode = &quot;02340&quot;
1339SELECT address.* ;
1340</pre></div></div>
1341
1342<p>Result:</p>
1343
1344<div>
1345<div>
1346<pre class="source">[
1347 {
1348 &quot;street&quot;: &quot;690 River St.&quot;,
1349 &quot;city&quot;: &quot;Hanover, MA&quot;,
1350 &quot;zipcode&quot;: &quot;02340&quot;
1351 }
1352]
1353</pre></div></div>
1354</div></div></div>
1355<div class="section">
1356<h3><a name="SELECT_DISTINCT"></a><a name="Select_distinct" id="Select_distinct">SELECT DISTINCT</a></h3>
1357<p>The <tt>DISTINCT</tt> keyword is used to eliminate duplicate items from the results of a query block.</p>
1358<div class="section">
1359<div class="section">
1360<h5><a name="Example"></a>Example</h5>
1361<p>(Q3.5) Returns all of the different cities in the <tt>customers</tt> dataset.</p>
1362
1363<div>
1364<div>
1365<pre class="source">FROM customers AS c
1366SELECT DISTINCT c.address.city;
1367</pre></div></div>
1368
1369<p>Result:</p>
1370
1371<div>
1372<div>
1373<pre class="source">[
1374 {
1375 &quot;city&quot;: &quot;Boston, MA&quot;
1376 },
1377 {
1378 &quot;city&quot;: &quot;Hanover, MA&quot;
1379 },
1380 {
1381 &quot;city&quot;: &quot;St. Louis, MO&quot;
1382 },
1383 {
1384 &quot;city&quot;: &quot;Rome, Italy&quot;
1385 }
1386]
1387</pre></div></div>
1388</div></div></div>
1389<div class="section">
1390<h3><a name="Unnamed_Projections"></a><a name="Unnamed_projections" id="Unnamed_projections">Unnamed Projections</a></h3>
1391<p>Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed <tt>SELECT</tt> clause items), for which names are generated rather than user-provided. Name generation has three cases:</p>
1392<ul>
1393
1394<li>If a projection expression is a variable reference expression, its generated name is the name of the variable.</li>
1395<li>If a projection expression is a field access expression, its generated name is the last identifier in the expression.</li>
1396<li>For all other cases, the query processor will generate a unique name.</li>
1397</ul>
1398<div class="section">
1399<div class="section">
1400<h5><a name="Example"></a>Example</h5>
1401<p>(Q3.6) Returns the last digit and the order date of all orders for the customer whose ID is &#x201c;C41&#x201d;.</p>
1402
1403<div>
1404<div>
1405<pre class="source">FROM orders AS o
1406WHERE o.custid = &quot;C41&quot;
1407SELECT o.orderno % 1000, o.order_date;
1408</pre></div></div>
1409
1410<p>Result:</p>
1411
1412<div>
1413<div>
1414<pre class="source">[
1415 {
1416 &quot;$1&quot;: 1,
1417 &quot;order_date&quot;: &quot;2020-04-29&quot;
1418 },
1419 {
1420 &quot;$1&quot;: 6,
1421 &quot;order_date&quot;: &quot;2020-09-02&quot;
1422 }
1423]
1424</pre></div></div>
1425
1426<p>In the result, <tt>$1</tt> is the generated name for <tt>o.orderno % 1000</tt>, while <tt>order_date</tt> is the generated name for <tt>o.order_date</tt>. It is good practice, however, to not rely on the randomly generated names which can be confusing and irrelevant. Instead, practice good naming conventions by providing a meaningful and concise name which properly describes the selected item.</p></div></div></div>
1427<div class="section">
1428<h3><a name="Abbreviated_Field_Access_Expressions"></a><a name="Abbreviated_field_access_expressions" id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></h3>
1429<p>As in standard SQL, field access expressions can be abbreviated when there is no ambiguity. In the next example, the variable <tt>o</tt> is the only possible variable reference for fields <tt>orderno</tt> and <tt>order_date</tt> and thus could be omitted in the query. This practice is not recommended, however, as queries may have fields (such as <tt>custid</tt>) which can be present in multiple datasets. More information on abbreviated field access can be found in the appendix section on Variable Resolution.</p>
1430<div class="section">
1431<div class="section">
1432<h5><a name="Example"></a>Example</h5>
1433<p>(Q3.7) Same as Q3.6, omitting the variable reference for the order number and date and providing custom names for <tt>SELECT</tt> clause items.</p>
1434
1435<div>
1436<div>
1437<pre class="source">FROM orders AS o
1438WHERE o.custid = &quot;C41&quot;
1439SELECT orderno % 1000 AS last_digit, order_date;
1440</pre></div></div>
1441
1442<p>Result:</p>
1443
1444<div>
1445<div>
1446<pre class="source">[
1447 {
1448 &quot;last_digit&quot;: 1,
1449 &quot;order_date&quot;: &quot;2020-04-29&quot;
1450 },
1451 {
1452 &quot;last_digit&quot;: 6,
1453 &quot;order_date&quot;: &quot;2020-09-02&quot;
1454 }
1455]
1456</pre></div></div>
1457</div></div></div></div>
1458<div class="section">
1459<h2><a name="FROM_Clause"></a><a name="From_clauses" id="From_clauses">FROM Clause</a></h2>
1460<div class="section">
1461<div class="section">
1462<div class="section">
1463<h5><a name="FromClause"></a>FromClause</h5>
1464<p><img src="../images/diagrams/FromClause.png" alt="" /></p></div>
1465<div class="section">
1466<h5><a name="FromTerm"></a>FromTerm</h5>
1467<p><img src="../images/diagrams/FromTerm.png" alt="" /></p></div>
1468<div class="section">
1469<h5><a name="NamedExpr"></a>NamedExpr</h5>
1470<p><img src="../images/diagrams/NamedExpr.png" alt="" /></p></div>
1471<div class="section">
1472<h5><a name="JoinStep"></a>JoinStep</h5>
1473<p><img src="../images/diagrams/JoinStep.png" alt="" /></p></div>
1474<div class="section">
1475<h5><a name="UnnestStep"></a>UnnestStep</h5>
1476<p><img src="../images/diagrams/UnnestStep.png" alt="" /></p>
1477<p>Synonyms for <tt>UNNEST</tt>: <tt>CORRELATE</tt>, <tt>FLATTEN</tt></p>
1478<p>The purpose of a <tt>FROM</tt> clause is to iterate over a collection, binding a variable to each item in turn. Here&#x2019;s a query that iterates over the <tt>customers</tt> dataset, choosing certain customers and returning some of their attributes.</p></div>
1479<div class="section">
1480<h5><a name="Example"></a>Example</h5>
1481<p>(Q3.8) List the customer ids and names of the customers in zipcode 63101, in order by their customer IDs.</p>
1482
1483<div>
1484<div>
1485<pre class="source">FROM customers
1486WHERE address.zipcode = &quot;63101&quot;
1487SELECT custid AS customer_id, name
1488ORDER BY customer_id;
1489</pre></div></div>
1490
1491<p>Result:</p>
1492
1493<div>
1494<div>
1495<pre class="source">[
1496 {
1497 &quot;customer_id&quot;: &quot;C13&quot;,
1498 &quot;name&quot;: &quot;T. Cody&quot;
1499 },
1500 {
1501 &quot;customer_id&quot;: &quot;C31&quot;,
1502 &quot;name&quot;: &quot;B. Pruitt&quot;
1503 },
1504 {
1505 &quot;customer_id&quot;: &quot;C41&quot;,
1506 &quot;name&quot;: &quot;R. Dodge&quot;
1507 }
1508]
1509</pre></div></div>
1510
1511<p>Let&#x2019;s take a closer look at what this <tt>FROM</tt> clause is doing. A <tt>FROM</tt> clause always produces a stream of bindings, in which an iteration variable is bound in turn to each item in a collection. In Q3.8, since no explicit iteration variable is provided, the <tt>FROM</tt> clause defines an implicit variable named <tt>customers</tt>, the same name as the dataset that is being iterated over. The implicit iteration variable serves as the object-name for all field-names in the query block that do not have explicit object-names. Thus, <tt>address.zipcode</tt> really means <tt>customers.address.zipcode</tt>, <tt>custid</tt> really means <tt>customers.custid</tt>, and <tt>name</tt> really means <tt>customers.name</tt>.</p>
1512<p>You may also provide an explicit iteration variable, as in this version of the same query:</p></div>
1513<div class="section">
1514<h5><a name="Example"></a>Example</h5>
1515<p>(Q3.9) Alternative version of Q3.8 (same result).</p>
1516
1517<div>
1518<div>
1519<pre class="source">FROM customers AS c
1520WHERE c.address.zipcode = &quot;63101&quot;
1521SELECT c.custid AS customer_id, c.name
1522ORDER BY customer_id;
1523</pre></div></div>
1524
1525<p>In Q3.9, the variable <tt>c</tt> is bound to each <tt>customer</tt> object in turn as the query iterates over the <tt>customers</tt> dataset. An explicit iteration variable can be used to identify the fields of the referenced object, as in <tt>c.name</tt> in the <tt>SELECT</tt> clause of Q3.9. When referencing a field of an object, the iteration variable can be omitted when there is no ambiguity. For example, <tt>c.name</tt> could be replaced by <tt>name</tt> in the <tt>SELECT</tt> clause of Q3.9. That&#x2019;s why field-names like <tt>name</tt> and <tt>custid</tt> could stand by themselves in the Q3.8 version of this query.</p>
1526<p>In the examples above, the <tt>FROM</tt> clause iterates over the objects in a dataset. But in general, a <tt>FROM</tt> clause can iterate over any collection. For example, the objects in the <tt>orders</tt> dataset each contain a field called <tt>items</tt>, which is an array of nested objects. In some cases, you will write a <tt>FROM</tt> clause that iterates over a nested array like <tt>items</tt>.</p>
1527<p>The stream of objects (more accurately, variable bindings) that is produced by the <tt>FROM</tt> clause does not have any particular order. The system will choose the most efficient order for the iteration. If you want your query result to have a specific order, you must use an <tt>ORDER BY</tt> clause.</p>
1528<p>It&#x2019;s good practice to specify an explicit iteration variable for each collection in the <tt>FROM</tt> clause, and to use these variables to qualify the field-names in other clauses. Here are some reasons for this convention:</p>
1529<ul>
1530
1531<li>
1532
1533<p>It&#x2019;s nice to have different names for the collection as a whole and an object in the collection. For example, in the clause <tt>FROM customers AS c</tt>, the name <tt>customers</tt> represents the dataset and the name <tt>c</tt> represents one object in the dataset.</p>
1534</li>
1535<li>
1536
1537<p>In some cases, iteration variables are required. For example, when joining a dataset to itself, distinct iteration variables are required to distinguish the left side of the join from the right side.</p>
1538</li>
1539<li>
1540
1541<p>In a subquery it&#x2019;s sometimes necessary to refer to an object in an outer query block (this is called a <i>correlated subquery</i>). To avoid confusion in correlated subqueries, it&#x2019;s best to use explicit variables.</p>
1542</li>
1543</ul></div></div></div>
1544<div class="section">
1545<h3><a name="Joins"></a><a name="Join_clauses" id="Join_clauses">Joins</a></h3>
1546<p>A <tt>FROM</tt> clause gets more interesting when there is more than one collection involved. The following query iterates over two collections: <tt>customers</tt> and <tt>orders</tt>. The <tt>FROM</tt> clause produces a stream of binding tuples, each containing two variables, <tt>c</tt> and <tt>o</tt>. In each binding tuple, <tt>c</tt> is bound to an object from <tt>customers</tt>, and <tt>o</tt> is bound to an object from <tt>orders</tt>. Conceptually, at this point, the binding tuple stream contains all possible pairs of a customer and an order (this is called the <i>Cartesian product</i> of <tt>customers</tt> and <tt>orders</tt>). Of course, we are interested only in pairs where the <tt>custid</tt> fields match, and that condition is expressed in the <tt>WHERE</tt> clause, along with the restriction that the order number must be 1001.</p>
1547<div class="section">
1548<div class="section">
1549<h5><a name="Example"></a>Example</h5>
1550<p>(Q3.10) Create a packing list for order number 1001, showing the customer name and address and all the items in the order.</p>
1551
1552<div>
1553<div>
1554<pre class="source">FROM customers AS c, orders AS o
1555WHERE c.custid = o.custid
1556AND o.orderno = 1001
1557SELECT o.orderno,
1558 c.name AS customer_name,
1559 c.address,
1560 o.items AS items_ordered;
1561</pre></div></div>
1562
1563<p>Result:</p>
1564
1565<div>
1566<div>
1567<pre class="source">[
1568 {
1569 &quot;orderno&quot;: 1001,
1570 &quot;customer_name&quot;: &quot;R. Dodge&quot;,
1571 &quot;address&quot;: {
1572 &quot;street&quot;: &quot;150 Market St.&quot;,
1573 &quot;city&quot;: &quot;St. Louis, MO&quot;,
1574 &quot;zipcode&quot;: &quot;63101&quot;
1575 },
1576 &quot;items_ordered&quot;: [
1577 {
1578 &quot;itemno&quot;: 347,
1579 &quot;qty&quot;: 5,
1580 &quot;price&quot;: 19.99
1581 },
1582 {
1583 &quot;itemno&quot;: 193,
1584 &quot;qty&quot;: 2,
1585 &quot;price&quot;: 28.89
1586 }
1587 ]
1588 }
1589]
1590</pre></div></div>
1591
1592<p>Q3.10 is called a <i>join query</i> because it joins the <tt>customers</tt> collection and the <tt>orders</tt> collection, using the join condition <tt>c.custid = o.custid</tt>. In SQL++, as in SQL, you can express this query more explicitly by a <tt>JOIN</tt> clause that includes the join condition, as follows:</p></div>
1593<div class="section">
1594<h5><a name="Example"></a>Example</h5>
1595<p>(Q3.11) Alternative statement of Q3.10 (same result).</p>
1596
1597<div>
1598<div>
1599<pre class="source">FROM customers AS c JOIN orders AS o
1600 ON c.custid = o.custid
1601WHERE o.orderno = 1001
1602SELECT o.orderno,
1603 c.name AS customer_name,
1604 c.address,
1605 o.items AS items_ordered;
1606</pre></div></div>
1607
1608<p>Whether you express the join condition in a <tt>JOIN</tt> clause or in a <tt>WHERE</tt> clause is a matter of taste; the result is the same. This manual will generally use a comma-separated list of collection-names in the <tt>FROM</tt> clause, leaving the join condition to be expressed elsewhere. As we&#x2019;ll soon see, in some query blocks the join condition can be omitted entirely.</p>
1609<p>There is, however, one case in which an explicit <tt>JOIN</tt> clause is necessary. That is when you need to join collection A to collection B, and you want to make sure that every item in collection A is present in the query result, even if it doesn&#x2019;t match any item in collection B. This kind of query is called a <i>left outer join</i>, and it is illustrated by the following example.</p></div>
1610<div class="section">
1611<h5><a name="Example"></a>Example</h5>
1612<p>(Q3.12) List the customer ID and name, together with the order numbers and dates of their orders (if any) of customers T. Cody and M. Sinclair.</p>
1613
1614<div>
1615<div>
1616<pre class="source">FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
1617WHERE c.name = &quot;T. Cody&quot;
1618 OR c.name = &quot;M. Sinclair&quot;
1619SELECT c.custid, c.name, o.orderno, o.order_date
1620ORDER BY c.custid, o.order_date;
1621</pre></div></div>
1622
1623<p>Result:</p>
1624
1625<div>
1626<div>
1627<pre class="source">[
1628 {
1629 &quot;custid&quot;: &quot;C13&quot;,
1630 &quot;orderno&quot;: 1002,
1631 &quot;name&quot;: &quot;T. Cody&quot;,
1632 &quot;order_date&quot;: &quot;2020-05-01&quot;
1633 },
1634 {
1635 &quot;custid&quot;: &quot;C13&quot;,
1636 &quot;orderno&quot;: 1007,
1637 &quot;name&quot;: &quot;T. Cody&quot;,
1638 &quot;order_date&quot;: &quot;2020-09-13&quot;
1639 },
1640 {
1641 &quot;custid&quot;: &quot;C13&quot;,
1642 &quot;orderno&quot;: 1008,
1643 &quot;name&quot;: &quot;T. Cody&quot;,
1644 &quot;order_date&quot;: &quot;2020-10-13&quot;
1645 },
1646 {
1647 &quot;custid&quot;: &quot;C13&quot;,
1648 &quot;orderno&quot;: 1009,
1649 &quot;name&quot;: &quot;T. Cody&quot;,
1650 &quot;order_date&quot;: &quot;2020-10-13&quot;
1651 },
1652 {
1653 &quot;custid&quot;: &quot;C25&quot;,
1654 &quot;name&quot;: &quot;M. Sinclair&quot;
1655 }
1656]
1657</pre></div></div>
1658
1659<p>As you can see from the result of this left outer join, our data includes four orders from customer T. Cody, but no orders from customer M. Sinclair. The behavior of left outer join in SQL++ is different from that of SQL. SQL would have provided M. Sinclair with an order in which all the fields were <tt>null</tt>. SQL++, on the other hand, deals with schemaless data, which permits it to simply omit the order fields from the outer join.</p>
1660<p>Now we&#x2019;re ready to look at a new kind of join that was not provided (or needed) in original SQL. Consider this query:</p></div>
1661<div class="section">
1662<h5><a name="Example"></a>Example</h5>
1663<p>(Q3.13) For every case in which an item is ordered in a quantity greater than 100, show the order number, date, item number, and quantity.</p>
1664
1665<div>
1666<div>
1667<pre class="source">FROM orders AS o, o.items AS i
1668WHERE i.qty &gt; 100
1669SELECT o.orderno, o.order_date, i.itemno AS item_number,
1670 i.qty AS quantity
1671ORDER BY o.orderno, item_number;
1672</pre></div></div>
1673
1674<p>Result:</p>
1675
1676<div>
1677<div>
1678<pre class="source">[
1679 {
1680 &quot;orderno&quot;: 1002,
1681 &quot;order_date&quot;: &quot;2020-05-01&quot;,
1682 &quot;item_number&quot;: 680,
1683 &quot;quantity&quot;: 150
1684 },
1685 {
1686 &quot;orderno&quot;: 1005,
1687 &quot;order_date&quot;: &quot;2020-08-30&quot;,
1688 &quot;item_number&quot;: 347,
1689 &quot;quantity&quot;: 120
1690 },
1691 {
1692 &quot;orderno&quot;: 1006,
1693 &quot;order_date&quot;: &quot;2020-09-02&quot;,
1694 &quot;item_number&quot;: 460,
1695 &quot;quantity&quot;: 120
1696 }
1697]
1698</pre></div></div>
1699
1700<p>Q3.13 illustrates a feature called <i>left-correlation</i> in the <tt>FROM</tt> clause. Notice that we are joining <tt>orders</tt>, which is a dataset, to <tt>items</tt>, which is an array nested inside each order. In effect, for each order, we are unnesting the <tt>items</tt> array and joining it to the <tt>order</tt> as though it were a separate collection. For this reason, this kind of query is sometimes called an <i>unnesting query</i>. The keyword <tt>UNNEST</tt> may be used whenever left-correlation is used in a <tt>FROM</tt> clause, as shown in this example:</p></div>
1701<div class="section">
1702<h5><a name="Example"></a>Example</h5>
1703<p>(Q3.14) Alternative statement of Q3.13 (same result).</p>
1704
1705<div>
1706<div>
1707<pre class="source">FROM orders AS o UNNEST o.items AS i
1708WHERE i.qty &gt; 100
1709SELECT o.orderno, o.order_date, i.itemno AS item_number,
1710 i.qty AS quantity
1711ORDER BY o.orderno, item_number;
1712</pre></div></div>
1713
1714<p>The results of Q3.13 and Q3.14 are exactly the same. <tt>UNNEST</tt> serves as a reminder that left-correlation is being used to join an object with its nested items. The join condition in Q3.14 is expressed by the left-correlation: each order <tt>o</tt> is joined to its own items, referenced as <tt>o.items</tt>. The result of the <tt>FROM</tt> clause is a stream of binding tuples, each containing two variables, <tt>o</tt> and <tt>i</tt>. The variable <tt>o</tt> is bound to an order and the variable <tt>i</tt> is bound to one item inside that order.</p>
1715<p>Like <tt>JOIN</tt>, <tt>UNNEST</tt> has a <tt>LEFT OUTER</tt> option. Q3.14 could have specified:</p>
1716
1717<div>
1718<div>
1719<pre class="source">FROM orders AS o LEFT OUTER UNNEST o.items AS i
1720</pre></div></div>
1721
1722<p>In this case, orders that have no nested items would appear in the query result.</p></div></div></div></div>
1723<div class="section">
1724<h2><a name="LET_Clause"></a><a name="Let_clauses" id="Let_clauses">LET Clause</a></h2>
1725<div class="section">
1726<div class="section">
1727<div class="section">
1728<h5><a name="LetClause"></a>LetClause</h5>
1729<p><img src="../images/diagrams/LetClause.png" alt="" /></p>
1730<p>Synonyms for <tt>LET</tt>: <tt>LETTING</tt></p>
1731<p><tt>LET</tt> clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The word <tt>LETTING</tt> can also be used, although this is not as common. The next query shows an example.</p></div>
1732<div class="section">
1733<h5><a name="Example"></a>Example</h5>
1734<p>(Q3.15) For each item in an order, the revenue is defined as the quantity times the price of that item. Find individual items for which the revenue is greater than 5000. For each of these, list the order number, item number, and revenue, in descending order by revenue.</p>
1735
1736<div>
1737<div>
1738<pre class="source">FROM orders AS o, o.items AS i
1739LET revenue = i.qty * i.price
1740WHERE revenue &gt; 5000
1741SELECT o.orderno, i.itemno, revenue
1742ORDER by revenue desc;
1743</pre></div></div>
1744
1745<p>Result:</p>
1746
1747<div>
1748<div>
1749<pre class="source">[
1750 {
1751 &quot;orderno&quot;: 1006,
1752 &quot;itemno&quot;: 460,
1753 &quot;revenue&quot;: 11997.6
1754 },
1755 {
1756 &quot;orderno&quot;: 1002,
1757 &quot;itemno&quot;: 460,
1758 &quot;revenue&quot;: 9594.05
1759 },
1760 {
1761 &quot;orderno&quot;: 1006,
1762 &quot;itemno&quot;: 120,
1763 &quot;revenue&quot;: 5525
1764 }
1765]
1766</pre></div></div>
1767
1768<p>The expression for computing revenue is defined once in the <tt>LET</tt> clause and then used three times in the remainder of the query. Avoiding repetition of the revenue expression makes the query shorter and less prone to errors.</p></div></div></div></div>
1769<div class="section">
1770<h2><a name="WHERE_Clause"></a><a name="Where_having_clauses" id="Where_having_clauses">WHERE Clause</a></h2>
1771<div class="section">
1772<div class="section">
1773<div class="section">
1774<h5><a name="WhereClause"></a>WhereClause</h5>
1775<p><img src="../images/diagrams/WhereClause.png" alt="" /></p>
1776<p>The purpose of a <tt>WHERE</tt> clause is to operate on the stream of binding tuples generated by the <tt>FROM</tt> clause, filtering out the tuples that do not satisfy a certain condition. The condition is specified by an expression based on the variable names in the binding tuples. If the expression evaluates to true, the tuple remains in the stream; if it evaluates to anything else, including <tt>null</tt> or <tt>missing</tt>, it is filtered out. The surviving tuples are then passed along to the next clause to be processed (usually either <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
1777<p>Often, the expression in a <tt>WHERE</tt> clause is some kind of comparison like <tt>quantity &gt; 100</tt>. However, any kind of expression is allowed in a <tt>WHERE</tt> clause. The only thing that matters is whether the expression returns <tt>true</tt> or not.</p></div></div></div></div>
1778<div class="section">
1779<h2><a name="Grouping" id="Grouping">Grouping</a></h2>
1780<p>Grouping is especially important when manipulating hierarchies like the ones that are often found in JSON data. Often you will want to generate output data that includes both summary data and line items within the summaries. For this purpose, SQL++ supports several important extensions to the traditional grouping features of SQL. The familiar <tt>GROUP BY</tt> and <tt>HAVING</tt> clauses are still there, and they are joined by a new clause called <tt>GROUP AS</tt>. We&#x2019;ll illustrate these clauses by a series of examples.</p>
1781<div class="section">
1782<h3><a name="GROUP_BY_Clause"></a><a name="Group_By_clauses" id="Group_By_clauses">GROUP BY Clause</a></h3>
1783<div class="section">
1784<div class="section">
1785<h5><a name="GroupByClause"></a>GroupByClause</h5>
1786<p><img src="../images/diagrams/GroupByClause.png" alt="" /></p></div>
1787<div class="section">
1788<h5><a name="GroupingElement"></a>GroupingElement</h5>
1789<p><img src="../images/diagrams/GroupingElement.png" alt="" /></p></div>
1790<div class="section">
1791<h5><a name="OrdinaryGroupingSet"></a>OrdinaryGroupingSet</h5>
1792<p><img src="../images/diagrams/OrdinaryGroupingSet.png" alt="" /></p></div>
1793<div class="section">
1794<h5><a name="NamedExpr"></a>NamedExpr</h5>
1795<p><img src="../images/diagrams/NamedExpr.png" alt="" /></p>
1796<p>We&#x2019;ll begin our discussion of grouping with an example from ordinary SQL.</p></div>
1797<div class="section">
1798<h5><a name="Example"></a>Example</h5>
1799<p>(Q3.16) List the number of orders placed by each customer who has placed an order.</p>
1800
1801<div>
1802<div>
1803<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
1804FROM orders AS o
1805GROUP BY o.custid
1806ORDER BY o.custid;
1807</pre></div></div>
1808
1809<p>Result:</p>
1810
1811<div>
1812<div>
1813<pre class="source">[
1814 {
1815 &quot;order count&quot;: 4,
1816 &quot;custid&quot;: &quot;C13&quot;
1817 },
1818 {
1819 &quot;order count&quot;: 1,
1820 &quot;custid&quot;: &quot;C31&quot;
1821 },
1822 {
1823 &quot;order count&quot;: 1,
1824 &quot;custid&quot;: &quot;C35&quot;
1825 },
1826 {
1827 &quot;order count&quot;: 1,
1828 &quot;custid&quot;: &quot;C37&quot;
1829 },
1830 {
1831 &quot;order count&quot;: 2,
1832 &quot;custid&quot;: &quot;C41&quot;
1833 }
1834]
1835</pre></div></div>
1836
1837<p>The input to a <tt>GROUP BY</tt> clause is the stream of binding tuples generated by the <tt>FROM</tt> and <tt>WHERE</tt>clauses. In this query, before grouping, the variable <tt>o</tt> is bound to each object in the <tt>orders</tt> collection in turn.</p>
1838<p>SQL++ evaluates the expression in the <tt>GROUP BY</tt> clause, called the grouping expression, once for each of the binding tuples. It then organizes the results into groups in which the grouping expression has a common value (as defined by the <tt>=</tt> operator). In this example, the grouping expression is <tt>o.custid</tt>, and each of the resulting groups is a set of <tt>orders</tt> that have the same <tt>custid</tt>. If necessary, a group is formed for <tt>orders</tt> in which <tt>custid</tt> is <tt>null</tt>, and another group is formed for <tt>orders</tt> that have no <tt>custid</tt>. This query uses the aggregating function <tt>COUNT(o.orderno)</tt>, which counts how many order numbers are in each group. If we are sure that each order object has a distinct <tt>orderno</tt>, we could also simply count the order objects in each group by using <tt>COUNT(*)</tt> in place of <tt>COUNT(o.orderno)</tt>.</p>
1839<p>In the <tt>GROUP BY</tt>clause, you may optionally define an alias for the grouping expression. For example, in Q3.16, you could have written <tt>GROUP BY o.custid AS cid</tt>. The alias <tt>cid</tt> could then be used in place of the grouping expression in later clauses. In cases where the grouping expression contains an operator, it is especially helpful to define an alias (for example, <tt>GROUP BY salary + bonus AS pay)</tt>.</p>
1840<p>Q3.16 had a single grouping expression, <tt>o.custid</tt>. If a query has multiple grouping expressions, the combination of grouping expressions is evaluated for every binding tuple, and the stream of binding tuples is partitioned into groups that have values in common for all of the grouping expressions. We&#x2019;ll see an example of such a query in Q3.18.</p>
1841<p>After grouping, the number of binding tuples is reduced: instead of a binding tuple for each of the input objects, there is a binding tuple for each group. The grouping expressions (identified by their aliases, if any) are bound to the results of their evaluations. However, all the non-grouping fields (that is, fields that were not named in the grouping expressions), are accessible only in a special way: as an argument of one of the special aggregation pseudo-functions such as: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, <tt>STDEV</tt> and <tt>COUNT</tt>. The clauses that come after grouping can access only properties of groups, including the grouping expressions and aggregate properties of the groups such as <tt>COUNT(o.orderno)</tt> or <tt>COUNT(*)</tt>. (We&#x2019;ll see an exception when we discuss the new <tt>GROUP AS</tt> clause.)</p>
1842<p>You may notice that the results of Q3.16 do not include customers who have no <tt>orders</tt>. If we want to include these <tt>customers</tt>, we need to use an outer join between the <tt>customers</tt> and <tt>orders</tt> collections. This is illustrated by the following example, which also includes the name of each customer.</p></div>
1843<div class="section">
1844<h5><a name="Example"></a>Example</h5>
1845<p>(Q3.17) List the number of orders placed by each customer including those customers who have placed no orders.</p>
1846
1847<div>
1848<div>
1849<pre class="source">SELECT c.custid, c.name, COUNT(o.orderno) AS `order count`
1850FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
1851GROUP BY c.custid, c.name
1852ORDER BY c.custid;
1853</pre></div></div>
1854
1855<p>Result:</p>
1856
1857<div>
1858<div>
1859<pre class="source">[
1860 {
1861 &quot;custid&quot;: &quot;C13&quot;,
1862 &quot;order count&quot;: 4,
1863 &quot;name&quot;: &quot;T. Cody&quot;
1864 },
1865 {
1866 &quot;custid&quot;: &quot;C25&quot;,
1867 &quot;order count&quot;: 0,
1868 &quot;name&quot;: &quot;M. Sinclair&quot;
1869 },
1870 {
1871 &quot;custid&quot;: &quot;C31&quot;,
1872 &quot;order count&quot;: 1,
1873 &quot;name&quot;: &quot;B. Pruitt&quot;
1874 },
1875 {
1876 &quot;custid&quot;: &quot;C35&quot;,
1877 &quot;order count&quot;: 1,
1878 &quot;name&quot;: &quot;J. Roberts&quot;
1879 },
1880 {
1881 &quot;custid&quot;: &quot;C37&quot;,
1882 &quot;order count&quot;: 1,
1883 &quot;name&quot;: &quot;T. Henry&quot;
1884 },
1885 {
1886 &quot;custid&quot;: &quot;C41&quot;,
1887 &quot;order count&quot;: 2,
1888 &quot;name&quot;: &quot;R. Dodge&quot;
1889 },
1890 {
1891 &quot;custid&quot;: &quot;C47&quot;,
1892 &quot;order count&quot;: 0,
1893 &quot;name&quot;: &quot;S. Logan&quot;
1894 }
1895]
1896</pre></div></div>
1897
1898<p>Notice in Q3.17 what happens when the special aggregation function <tt>COUNT</tt> is applied to a collection that does not exist, such as the orders of M. Sinclair: it returns zero. This behavior is unlike that of the other special aggregation functions <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, and <tt>MIN</tt>, which return <tt>null</tt> if their operand does not exist. This should make you cautious about the <tt>COUNT</tt> function: If it returns zero, that may mean that the collection you are counting has zero members, or that it does not exist, or that you have misspelled the collection&#x2019;s name.</p>
1899<p>Q3.17 also shows how a query block can have more than one grouping expression. In general, the <tt>GROUP BY</tt>clause produces a binding tuple for each different combination of values for the grouping expressions. In Q3.17, the <tt>c.custid</tt> field uniquely identifies a customer, so adding <tt>c.name</tt> as a grouping expression does not result in any more groups. Nevertheless, <tt>c.name</tt> must be included as a grouping expression if it is to be referenced outside (after) the <tt>GROUP BY</tt> clause. If <tt>c.name</tt> were not included in the <tt>GROUP BY</tt> clause, it would not be a group property and could not be used in the <tt>SELECT</tt> clause.</p>
1900<p>Of course, a grouping expression need not be a simple field-name. In Q3.18, orders are grouped by month, using a temporal function to extract the month component of the order dates. In cases like this, it is helpful to define an alias for the grouping expression so that it can be referenced elsewhere in the query e.g. in the <tt>SELECT</tt> clause.</p></div>
1901<div class="section">
1902<h5><a name="Example"></a>Example</h5>
1903<p>(Q3.18) Find the months in 2020 that had the largest numbers of orders; list the months and their numbers of orders. (Return the top three.)</p>
1904
1905<div>
1906<div>
1907<pre class="source">FROM orders AS o
1908WHERE get_year(date(o.order_date)) = 2020
1909GROUP BY get_month(date(o.order_date)) AS month
1910SELECT month, COUNT(*) AS order_count
1911ORDER BY order_count DESC, month DESC
1912LIMIT 3;
1913</pre></div></div>
1914
1915<p>Result:</p>
1916
1917<div>
1918<div>
1919<pre class="source">[
1920 {
1921 &quot;month&quot;: 10,
1922 &quot;order_count&quot;: 2
1923 },
1924 {
1925 &quot;month&quot;: 9,
1926 &quot;order_count&quot;: 2
1927 },
1928 {
1929 &quot;month&quot;: 8,
1930 &quot;order_count&quot;: 1
1931 }
1932]
1933</pre></div></div>
1934
1935<p>Groups are commonly formed from named collections like <tt>customers</tt> and <tt>orders</tt>. But in some queries you need to form groups from a collection that is nested inside another collection, such as <tt>items</tt> inside <tt>orders</tt>. In SQL++ you can do this by using left-correlation in the <tt>FROM</tt> clause to unnest the inner collection, joining the inner collection with the outer collection, and then performing the grouping on the join, as illustrated in Q3.19.</p>
1936<p>Q3.19 also shows how a <tt>LET</tt> clause can be used after a <tt>GROUP BY</tt> clause to define an expression that is referenced multiple times in later clauses.</p></div>
1937<div class="section">
1938<h5><a name="Example"></a>Example</h5>
1939<p>(Q3.19) For each order, define the total revenue of the order as the sum of quantity times price for all the items in that order. List the total revenue for all the orders placed by the customer with id &#x201c;C13&#x201d;, in descending order by total revenue.</p>
1940
1941<div>
1942<div>
1943<pre class="source">FROM orders as o, o.items as i
1944WHERE o.custid = &quot;C13&quot;
1945GROUP BY o.orderno
1946LET total_revenue = sum(i.qty * i.price)
1947SELECT o.orderno, total_revenue
1948ORDER BY total_revenue desc;
1949</pre></div></div>
1950
1951<p>Result:</p>
1952
1953<div>
1954<div>
1955<pre class="source">[
1956 {
1957 &quot;orderno&quot;: 1002,
1958 &quot;total_revenue&quot;: 10906.55
1959 },
1960 {
1961 &quot;orderno&quot;: 1008,
1962 &quot;total_revenue&quot;: 1999.8
1963 },
1964 {
1965 &quot;orderno&quot;: 1007,
1966 &quot;total_revenue&quot;: 130.45
1967 }
1968]
1969</pre></div></div>
1970</div></div>
1971<div class="section">
1972<h4><a name="ROLLUP"></a><a name="Rollup" id="Rollup">ROLLUP</a></h4>
1973<p>The <tt>ROLLUP</tt> subclause is an aggregation feature that extends the functionality of the <tt>GROUP BY</tt> clause. It returns extra <i>super-aggregate</i> items in the query results, giving subtotals and a grand total for the aggregate functions in the query. To illustrate, first consider the following query.</p>
1974<div class="section">
1975<h5><a name="Example"></a>Example</h5>
1976<p>(Q3.R1) List the number of orders, grouped by customer region and city.</p>
1977
1978<div>
1979<div>
1980<pre class="source">SELECT customer_region AS Region,
1981 customer_city AS City,
1982 COUNT(o.orderno) AS `Order Count`
1983FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
1984LET address_line = SPLIT(c.address.city, &quot;,&quot;),
1985 customer_city = TRIM(address_line[0]),
1986 customer_region = TRIM(address_line[1])
1987GROUP BY customer_region, customer_city
1988ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
1989</pre></div></div>
1990
1991<p>Result:</p>
1992
1993<div>
1994<div>
1995<pre class="source">[
1996 {
1997 &quot;Region&quot;: &quot;Italy&quot;,
1998 &quot;City&quot;: &quot;Rome&quot;,
1999 &quot;Order Count&quot;: 0
2000 },
2001 {
2002 &quot;Region&quot;: &quot;MA&quot;,
2003 &quot;City&quot;: &quot;Boston&quot;,
2004 &quot;Order Count&quot;: 2
2005 },
2006 {
2007 &quot;Region&quot;: &quot;MA&quot;,
2008 &quot;City&quot;: &quot;Hanover&quot;,
2009 &quot;Order Count&quot;: 0
2010 },
2011 {
2012 &quot;Region&quot;: &quot;MO&quot;,
2013 &quot;City&quot;: &quot;St. Louis&quot;,
2014 &quot;Order Count&quot;: 7
2015 }
2016]
2017</pre></div></div>
2018
2019<p>This query uses string functions to split each customer&#x2019;s address into city and region. The query then counts the total number of orders placed by each customer, and groups the results first by customer region, then by customer city. The aggregate results (labeled <tt>Order Count</tt>) are only shown by city, and there are no subtotals or grand total. We can add these using the <tt>ROLLUP</tt> subclause, as in the following example.</p></div>
2020<div class="section">
2021<h5><a name="Example"></a>Example</h5>
2022<p>(Q3.R2) List the number of orders by customer region and city, including subtotals and a grand total.</p>
2023
2024<div>
2025<div>
2026<pre class="source">SELECT customer_region AS Region,
2027 customer_city AS City,
2028 COUNT(o.orderno) AS `Order Count`
2029FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
2030LET address_line = SPLIT(c.address.city, &quot;,&quot;),
2031 customer_city = TRIM(address_line[0]),
2032 customer_region = TRIM(address_line[1])
2033GROUP BY ROLLUP(customer_region, customer_city)
2034ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
2035</pre></div></div>
2036
2037<p>Result:</p>
2038
2039<div>
2040<div>
2041<pre class="source">[
2042 {
2043 &quot;Region&quot;: null,
2044 &quot;City&quot;: null,
2045 &quot;Order Count&quot;: 9
2046 },
2047 {
2048 &quot;Region&quot;: &quot;Italy&quot;,
2049 &quot;City&quot;: null,
2050 &quot;Order Count&quot;: 0
2051 },
2052 {
2053 &quot;Region&quot;: &quot;Italy&quot;,
2054 &quot;City&quot;: &quot;Rome&quot;,
2055 &quot;Order Count&quot;: 0
2056 },
2057 {
2058 &quot;Region&quot;: &quot;MA&quot;,
2059 &quot;City&quot;: null,
2060 &quot;Order Count&quot;: 2
2061 },
2062 {
2063 &quot;Region&quot;: &quot;MA&quot;,
2064 &quot;City&quot;: &quot;Boston&quot;,
2065 &quot;Order Count&quot;: 2
2066 },
2067 {
2068 &quot;Region&quot;: &quot;MA&quot;,
2069 &quot;City&quot;: &quot;Hanover&quot;,
2070 &quot;Order Count&quot;: 0
2071 },
2072 {
2073 &quot;Region&quot;: &quot;MO&quot;,
2074 &quot;City&quot;: null,
2075 &quot;Order Count&quot;: 7
2076 },
2077 {
2078 &quot;Region&quot;: &quot;MO&quot;,
2079 &quot;City&quot;: &quot;St. Louis&quot;,
2080 &quot;Order Count&quot;: 7
2081 }
2082]
2083</pre></div></div>
2084
2085<p>With the addition of the <tt>ROLLUP</tt> subclause, the results now include an extra item at the start of each region, giving the subtotal for that region. There is also another extra item at the very start of the results, giving the grand total for all regions.</p>
2086<p>The order of the fields specified by the <tt>ROLLUP</tt> subclause determines the hierarchy of the super-aggregate items. The customer region is specified first, followed by the customer city; so the results are aggregated by region first, and then by city within each region.</p>
2087<p>The grand total returns <tt>null</tt> as a value for the city and the region, and the subtotals return <tt>null</tt> as the value for the city, which may make the results hard to understand at first glance. A workaround for this is given in the next example.</p></div>
2088<div class="section">
2089<h5><a name="Example"></a>Example</h5>
2090<p>(Q3.R3) List the number of orders by customer region and city, with meaningful subtotals and grand total.</p>
2091
2092<div>
2093<div>
2094<pre class="source">SELECT IFNULL(customer_region, &quot;All regions&quot;) AS Region,
2095 IFNULL(customer_city, &quot;All cities&quot;) AS City,
2096 COUNT(o.orderno) AS `Order Count`
2097FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
2098LET address_line = SPLIT(c.address.city, &quot;,&quot;),
2099 customer_city = TRIM(address_line[0]),
2100 customer_region = TRIM(address_line[1])
2101GROUP BY ROLLUP(customer_region, customer_city)
2102ORDER BY customer_region ASC, customer_city ASC, `Order Count` DESC;
2103</pre></div></div>
2104
2105<p>Result:</p>
2106
2107<div>
2108<div>
2109<pre class="source">[
2110 {
2111 &quot;Region&quot;: &quot;All regions&quot;,
2112 &quot;City&quot;: &quot;All cities&quot;,
2113 &quot;Order Count&quot;: 9
2114 },
2115 {
2116 &quot;Region&quot;: &quot;Italy&quot;,
2117 &quot;City&quot;: &quot;All cities&quot;,
2118 &quot;Order Count&quot;: 0
2119 },
2120 {
2121 &quot;Region&quot;: &quot;Italy&quot;,
2122 &quot;City&quot;: &quot;Rome&quot;,
2123 &quot;Order Count&quot;: 0
2124 },
2125 {
2126 &quot;Region&quot;: &quot;MA&quot;,
2127 &quot;City&quot;: &quot;All cities&quot;,
2128 &quot;Order Count&quot;: 2
2129 },
2130 {
2131 &quot;Region&quot;: &quot;MA&quot;,
2132 &quot;City&quot;: &quot;Boston&quot;,
2133 &quot;Order Count&quot;: 2
2134 },
2135 {
2136 &quot;Region&quot;: &quot;MA&quot;,
2137 &quot;City&quot;: &quot;Hanover&quot;,
2138 &quot;Order Count&quot;: 0
2139 },
2140 {
2141 &quot;Region&quot;: &quot;MO&quot;,
2142 &quot;City&quot;: &quot;All cities&quot;,
2143 &quot;Order Count&quot;: 7
2144 },
2145 {
2146 &quot;Region&quot;: &quot;MO&quot;,
2147 &quot;City&quot;: &quot;St. Louis&quot;,
2148 &quot;Order Count&quot;: 7
2149 }
2150]
2151</pre></div></div>
2152
2153<p>This query uses the <tt>IFNULL</tt> function to populate the region and city fields with meaningful values for the super-aggregate items. This makes the results clearer and more readable.</p></div></div>
2154<div class="section">
2155<h4><a name="CUBE"></a><a name="Cube" id="Cube">CUBE</a></h4>
2156<p>The <tt>CUBE</tt> subclause is similar to the <tt>ROLLUP</tt> subclause, in that it returns extra super-aggregate items in the query results, giving subtotals and a grand total for the aggregate functions. Whereas <tt>ROLLUP</tt> returns a grand total and a hierarchy of subtotals based on the specified fields, the <tt>CUBE</tt> subclause returns a grand total and subtotals for every possible combination of the specified fields.</p>
2157<p>The following example is a modification of Q3.R3 which illustrates the <tt>CUBE</tt> subclause.</p>
2158<div class="section">
2159<h5><a name="Example"></a>Example</h5>
2160<p>(Q3.C) List the number of orders by customer region and order date, with all possible subtotals and a grand total.</p>
2161
2162<div>
2163<div>
2164<pre class="source">SELECT IFNULL(customer_region, &quot;All regions&quot;) AS Region,
2165 IFNULL(order_month, &quot;All months&quot;) AS Month,
2166 COUNT(o.orderno) AS `Order Count`
2167FROM customers AS c INNER JOIN orders AS o ON c.custid = o.custid
2168LET address_line = SPLIT(c.address.city, &quot;,&quot;),
2169 customer_region = TRIM(address_line[1]),
2170 order_month = get_month(date(o.order_date))
2171GROUP BY CUBE(customer_region, order_month)
2172ORDER BY customer_region ASC, order_month ASC;
2173</pre></div></div>
2174
2175<p>Result:</p>
2176
2177<div>
2178<div>
2179<pre class="source">[
2180 {
2181 &quot;Region&quot;: &quot;All regions&quot;,
2182 &quot;Order Count&quot;: 9,
2183 &quot;Month&quot;: &quot;All months&quot;
2184 },
2185 {
2186 &quot;Region&quot;: &quot;All regions&quot;,
2187 &quot;Order Count&quot;: 1,
2188 &quot;Month&quot;: 4
2189 },
2190 {
2191 &quot;Region&quot;: &quot;All regions&quot;,
2192 &quot;Order Count&quot;: 1,
2193 &quot;Month&quot;: 5
2194 },
2195 {
2196 &quot;Region&quot;: &quot;All regions&quot;,
2197 &quot;Order Count&quot;: 1,
2198 &quot;Month&quot;: 6
2199 },
2200 {
2201 &quot;Region&quot;: &quot;All regions&quot;,
2202 &quot;Order Count&quot;: 1,
2203 &quot;Month&quot;: 7
2204 },
2205 {
2206 &quot;Region&quot;: &quot;All regions&quot;,
2207 &quot;Order Count&quot;: 1,
2208 &quot;Month&quot;: 8
2209 },
2210 {
2211 &quot;Region&quot;: &quot;All regions&quot;,
2212 &quot;Order Count&quot;: 2,
2213 &quot;Month&quot;: 9
2214 },
2215 {
2216 &quot;Region&quot;: &quot;All regions&quot;,
2217 &quot;Order Count&quot;: 2,
2218 &quot;Month&quot;: 10
2219 },
2220 {
2221 &quot;Region&quot;: &quot;MA&quot;,
2222 &quot;Order Count&quot;: 2,
2223 &quot;Month&quot;: &quot;All months&quot;
2224 },
2225 {
2226 &quot;Region&quot;: &quot;MA&quot;,
2227 &quot;Order Count&quot;: 1,
2228 &quot;Month&quot;: 7
2229 },
2230 {
2231 &quot;Region&quot;: &quot;MA&quot;,
2232 &quot;Order Count&quot;: 1,
2233 &quot;Month&quot;: 8
2234 },
2235 {
2236 &quot;Region&quot;: &quot;MO&quot;,
2237 &quot;Order Count&quot;: 7,
2238 &quot;Month&quot;: &quot;All months&quot;
2239 },
2240 {
2241 &quot;Region&quot;: &quot;MO&quot;,
2242 &quot;Order Count&quot;: 1,
2243 &quot;Month&quot;: 4
2244 },
2245 {
2246 &quot;Region&quot;: &quot;MO&quot;,
2247 &quot;Order Count&quot;: 1,
2248 &quot;Month&quot;: 5
2249 },
2250 {
2251 &quot;Region&quot;: &quot;MO&quot;,
2252 &quot;Order Count&quot;: 1,
2253 &quot;Month&quot;: 6
2254 },
2255 {
2256 &quot;Region&quot;: &quot;MO&quot;,
2257 &quot;Order Count&quot;: 2,
2258 &quot;Month&quot;: 9
2259 },
2260 {
2261 &quot;Region&quot;: &quot;MO&quot;,
2262 &quot;Order Count&quot;: 2,
2263 &quot;Month&quot;: 10
2264 }
2265]
2266</pre></div></div>
2267
2268<p>To simplify the results, this query uses an inner join, so that customers who have not placed an order are not included in the totals. The query uses string functions to extract the region from each customer&#x2019;s address, and a temporal function to extract the year from the order date.</p>
2269<p>The query uses the <tt>CUBE</tt> subclause with customer region and order month. This means that there are four possible aggregates to calculate:</p>
2270<ul>
2271
2272<li>All regions, all months</li>
2273<li>All regions, each month</li>
2274<li>Each region, all months</li>
2275<li>Each region, each month</li>
2276</ul>
2277<p>The results start with the grand total, showing the total number of orders across all regions for all months. This is followed by date subtotals, showing the number of orders across all regions for each month. Following that are the regional subtotals, showing the total number of orders for all months in each region; and the result items, giving the number of orders for each month in each region.</p>
2278<p>The query also uses the <tt>IFNULL</tt> function to populate the region and date fields with meaningful values for the super-aggregate items. This makes the results clearer and more readable.</p></div></div></div>
2279<div class="section">
2280<h3><a name="HAVING_Clause"></a><a name="Having_clauses" id="Having_clauses">HAVING Clause</a></h3>
2281<div class="section">
2282<div class="section">
2283<h5><a name="HavingClause"></a>HavingClause</h5>
2284<p><img src="../images/diagrams/HavingClause.png" alt="" /></p>
2285<p>The <tt>HAVING</tt> clause is very similar to the <tt>WHERE</tt> clause, except that it comes after <tt>GROUP BY</tt> and applies a filter to groups rather than to individual objects. Here&#x2019;s an example of a <tt>HAVING</tt> clause that filters orders by applying a condition to their nested arrays of <tt>items</tt>.</p>
2286<p>By adding a <tt>HAVING</tt> clause to Q3.19, we can filter the results to include only those orders whose total revenue is greater than 1000, as shown in Q3.22.</p></div>
2287<div class="section">
2288<h5><a name="Example"></a>Example</h5>
2289<p>(Q3.20) Modify Q3.19 to include only orders whose total revenue is greater than 5000.</p>
2290
2291<div>
2292<div>
2293<pre class="source">FROM orders AS o, o.items as i
2294WHERE o.custid = &quot;C13&quot;
2295GROUP BY o.orderno
2296LET total_revenue = sum(i.qty * i.price)
2297HAVING total_revenue &gt; 5000
2298SELECT o.orderno, total_revenue
2299ORDER BY total_revenue desc;
2300</pre></div></div>
2301
2302<p>Result:</p>
2303
2304<div>
2305<div>
2306<pre class="source">[
2307 {
2308 &quot;orderno&quot;: 1002,
2309 &quot;total_revenue&quot;: 10906.55
2310 }
2311]
2312</pre></div></div>
2313</div></div></div>
2314<div class="section">
2315<h3><a name="Aggregation_Pseudo-Functions"></a><a name="Aggregation_PseudoFunctions" id="Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a></h3>
2316<p>SQL provides several special functions for performing aggregations on groups including: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>COUNT</tt> (some implementations provide more). These same functions are supported in SQL++. However, it&#x2019;s worth spending some time on these special functions because they don&#x2019;t behave like ordinary functions. They are called &#x201c;pseudo-functions&#x201d; here because they don&#x2019;t evaluate their operands in the same way as ordinary functions. To see the difference, consider these two examples, which are syntactically similar:</p>
2317<div class="section">
2318<div class="section">
2319<h5><a name="Example_1"></a>Example 1</h5>
2320
2321<div>
2322<div>
2323<pre class="source">SELECT LENGTH(name) FROM customers
2324</pre></div></div>
2325
2326<p>In Example 1, <tt>LENGTH</tt> is an ordinary function. It simply evaluates its operand (name) and then returns a result computed from the operand.</p></div>
2327<div class="section">
2328<h5><a name="Example_2"></a>Example 2</h5>
2329
2330<div>
2331<div>
2332<pre class="source">SELECT AVG(rating) FROM customers
2333</pre></div></div>
2334
2335<p>The effect of <tt>AVG</tt> in Example 2 is quite different. Rather than performing a computation on an individual rating value, <tt>AVG</tt> has a global effect: it effectively restructures the query. As a pseudo-function, <tt>AVG</tt> requires its operand to be a group; therefore, it automatically collects all the rating values from the query block and forms them into a group.</p>
2336<p>The aggregation pseudo-functions always require their operand to be a group. In some queries, the group is explicitly generated by a <tt>GROUP BY</tt> clause, as in Q3.21:</p></div>
2337<div class="section">
2338<h5><a name="Example"></a>Example</h5>
2339<p>(Q3.21) List the average credit rating of customers by zipcode.</p>
2340
2341<div>
2342<div>
2343<pre class="source">FROM customers AS c
2344GROUP BY c.address.zipcode AS zip
2345SELECT zip, AVG(c.rating) AS `avg credit rating`
2346ORDER BY zip;
2347</pre></div></div>
2348
2349<p>Result:</p>
2350
2351<div>
2352<div>
2353<pre class="source">[
2354 {
2355 &quot;avg credit rating&quot;: 625
2356 },
2357 {
2358 &quot;avg credit rating&quot;: 657.5,
2359 &quot;zip&quot;: &quot;02115&quot;
2360 },
2361 {
2362 &quot;avg credit rating&quot;: 690,
2363 &quot;zip&quot;: &quot;02340&quot;
2364 },
2365 {
2366 &quot;avg credit rating&quot;: 695,
2367 &quot;zip&quot;: &quot;63101&quot;
2368 }
2369]
2370</pre></div></div>
2371
2372<p>Note in the result of Q3.21 that one or more customers had no zipcode. These customers were formed into a group for which the value of the grouping key is missing. When the query results were returned in JSON format, the <tt>missing</tt> key simply does not appear. Also note that the group whose key is <tt>missing</tt> appears first because <tt>missing</tt> is considered to be smaller than any other value. If some customers had had <tt>null</tt> as a zipcode, they would have been included in another group, appearing after the <tt>missing</tt> group but before the other groups.</p>
2373<p>When an aggregation pseudo-function is used without an explicit <tt>GROUP BY</tt> clause, it implicitly forms the entire query block into a single group, as in Q3.22:</p></div>
2374<div class="section">
2375<h5><a name="Example"></a>Example</h5>
2376<p>(Q3.22) Find the average credit rating among all customers.</p>
2377
2378<div>
2379<div>
2380<pre class="source">FROM customers AS c
2381SELECT AVG(c.rating) AS `avg credit rating`;
2382</pre></div></div>
2383
2384<p>Result:</p>
2385
2386<div>
2387<div>
2388<pre class="source">[
2389 {
2390 &quot;avg credit rating&quot;: 670
2391 }
2392]
2393</pre></div></div>
2394
2395<p>The aggregation pseudo-function <tt>COUNT</tt> has a special form in which its operand is <tt>*</tt> instead of an expression.</p>
2396<p>For example, <tt>SELECT COUNT(*) FROM customers</tt> simply returns the total number of customers, whereas <tt>SELECT COUNT(rating) FROM customers</tt> returns the number of customers who have known ratings (that is, their ratings are not <tt>null</tt> or <tt>missing</tt>).</p>
2397<p>Because the aggregation pseudo-functions sometimes restructure their operands, they can be used only in query blocks where (explicit or implicit) grouping is being done. Therefore the pseudo-functions cannot operate directly on arrays or multisets. For operating directly on JSON collections, SQL++ provides a set of ordinary functions for computing aggregations. Each ordinary aggregation function (except the ones corresponding to <tt>COUNT</tt> and <tt>ARRAY_AGG</tt>) has two versions: one that ignores <tt>null</tt> and <tt>missing</tt> values and one that returns <tt>null</tt> if a <tt>null</tt> or <tt>missing</tt> value is encountered anywhere in the collection. The names of the aggregation functions are as follows:</p>
2398<table border="0" class="table table-striped">
2399<thead>
2400
2401<tr class="a">
2402<th> Aggregation pseudo-function; operates on groups only </th>
2403<th> Ordinary function: Ignores NULL or MISSING values </th>
2404<th> Ordinary function: Returns NULL if NULL or MISSING are encountered</th></tr>
2405</thead><tbody>
2406
2407<tr class="b">
2408<td>SUM</td>
2409<td> ARRAY_SUM</td>
2410<td> STRICT_SUM </td></tr>
2411<tr class="a">
2412<td> AVG </td>
2413<td>ARRAY_MAX</td>
2414<td> STRICT_MAX </td></tr>
2415<tr class="b">
2416<td> MAX </td>
2417<td> ARRAY_MIN</td>
2418<td> STRICT_MIN </td></tr>
2419<tr class="a">
2420<td> MIN </td>
2421<td> ARRAY_AVG</td>
2422<td> STRICT_AVG </td></tr>
2423<tr class="b">
2424<td> COUNT </td>
2425<td>ARRAY_COUNT</td>
2426<td>STRICT_COUNT (see exception below) </td></tr>
2427<tr class="a">
2428<td>STDDEV_SAMP</td>
2429<td>ARRAY_STDDEV_SAMP</td>
2430<td> STRICT_STDDEV_SAMP </td></tr>
2431<tr class="b">
2432<td>STDDEV_POP</td>
2433<td>ARRAY_STDDEV_POP</td>
2434<td> STRICT_STDDEV_POP </td></tr>
2435<tr class="a">
2436<td>VAR_SAMP</td>
2437<td>ARRAY_VAR_SAMP</td>
2438<td> STRICT_VAR_SAMP </td></tr>
2439<tr class="b">
2440<td>VAR_POP</td>
2441<td>ARRAY_VAR_POP</td>
2442<td> STRICT_VAR_POP </td></tr>
2443<tr class="a">
2444<td>SKEWENESS</td>
2445<td>ARRAY_SKEWNESS</td>
2446<td> STRICT_SKEWNESS </td></tr>
2447<tr class="b">
2448<td>KURTOSIS</td>
2449<td>ARRAY_KURTOSIS</td>
2450<td> STRICT_KURTOSIS </td></tr>
2451<tr class="a">
2452<td> </td>
2453<td>ARRAY_AGG</td>
2454<td> </td></tr>
2455</tbody>
2456</table>
2457<p>Exception: the ordinary aggregation function STRICT_COUNT operates on any collection, and returns a count of its items, including null values in the count. In this respect, STRICT_COUNT is more similar to COUNT(*) than to COUNT(expression).</p>
2458<p>Note that the ordinary aggregation functions that ignore <tt>null</tt> have names beginning with &#x201c;ARRAY&#x201d;. This naming convention has historical roots. Despite their names, the functions operate on both arrays and multisets.</p>
2459<p>Because of the special properties of the aggregation pseudo-functions, SQL (and therefore SQL++) is not a pure functional language. But every query that uses a pseudo-function can be expressed as an equivalent query that uses an ordinary function. Q3.23 is an example of how queries can be expressed without pseudo-functions. A more detailed explanation of all of the functions is also available in the section on <a href="builtins.html#AggregateFunctions">Aggregate Functions</a>.</p></div>
2460<div class="section">
2461<h5><a name="Example"></a>Example</h5>
2462<p>(Q3.23) Alternative form of Q3.22, using the ordinary function <tt>ARRAY_AVG</tt> rather than the aggregating pseudo-function <tt>AVG</tt>.</p>
2463
2464<div>
2465<div>
2466<pre class="source">SELECT ARRAY_AVG(
2467 (SELECT VALUE c.rating
2468 FROM customers AS c) ) AS `avg credit rating`;
2469</pre></div></div>
2470
2471<p>Result (same as Q3.22):</p>
2472
2473<div>
2474<div>
2475<pre class="source">[
2476 {
2477 &quot;avg credit rating&quot;: 670
2478 }
2479]
2480</pre></div></div>
2481
2482<p>If the function <tt>STRICT_AVG</tt> had been used in Q3.23 in place of <tt>ARRAY_AVG</tt>, the average credit rating returned by the query would have been <tt>null</tt>, because at least one customer has no credit rating.</p></div></div></div>
2483<div class="section">
2484<h3><a name="GROUP_AS_Clause"></a><a name="Group_As_clauses" id="Group_As_clauses">GROUP AS Clause</a></h3>
2485<div class="section">
2486<div class="section">
2487<h5><a name="GroupAsClause"></a>GroupAsClause</h5>
2488<p><img src="../images/diagrams/GroupAsClause.png" alt="" /></p>
2489<p>JSON is a hierarchical format, and a fully featured JSON query language needs to be able to produce hierarchies of its own, with computed data at every level of the hierarchy. The key feature of SQL++ that makes this possible is the <tt>GROUP AS</tt> clause.</p>
2490<p>A query may have a <tt>GROUP AS</tt> clause only if it has a <tt>GROUP BY</tt> clause. The <tt>GROUP BY</tt> clause &#x201c;hides&#x201d; the original objects in each group, exposing only the grouping expressions and special aggregation functions on the non-grouping fields. The purpose of the <tt>GROUP AS</tt> clause is to make the original objects in the group visible to subsequent clauses. Thus the query can generate output data both for the group as a whole and for the individual objects inside the group.</p>
2491<p>For each group, the <tt>GROUP AS</tt> clause preserves all the objects in the group, just as they were before grouping, and gives a name to this preserved group. The group name can then be used in the <tt>FROM</tt> clause of a subquery to process and return the individual objects in the group.</p>
2492<p>To see how this works, we&#x2019;ll write some queries that investigate the customers in each zipcode and their credit ratings. This would be a good time to review the sample database in <a href="#Manual_data">Appendix 4</a>. A part of the data is summarized below.</p>
2493
2494<div>
2495<div>
2496<pre class="source">Customers in zipcode 02115:
2497 C35, J. Roberts, rating 565
2498 C37, T. Henry, rating 750
2499
2500Customers in zipcode 02340:
2501 C25, M. Sinclair, rating 690
2502
2503Customers in zipcode 63101:
2504 C13, T. Cody, rating 750
2505 C31, B. Pruitt, (no rating)
2506 C41, R. Dodge, rating 640
2507
2508Customers with no zipcode:
2509 C47, S. Logan, rating 625
2510</pre></div></div>
2511
2512<p>Now let&#x2019;s consider the effect of the following clauses:</p>
2513
2514<div>
2515<div>
2516<pre class="source">FROM customers AS c
2517GROUP BY c.address.zipcode
2518GROUP AS g
2519</pre></div></div>
2520
2521<p>This query fragment iterates over the <tt>customers</tt> objects, using the iteration variable <tt>c</tt>. The <tt>GROUP BY</tt> clause forms the objects into groups, each with a common zipcode (including one group for customers with no zipcode). After the <tt>GROUP BY</tt> clause, we can see the grouping expression, <tt>c.address.zipcode</tt>, but other fields such as <tt>c.custid</tt> and <tt>c.name</tt> are visible only to special aggregation functions.</p>
2522<p>The clause <tt>GROUP AS g</tt> now makes the original objects visible again. For each group in turn, the variable <tt>g</tt> is bound to a multiset of objects, each of which has a field named <tt>c</tt>, which in turn contains one of the original objects. Thus after <tt>GROUP AS g</tt>, for the group with zipcode 02115, <tt>g</tt> is bound to the following multiset:</p>
2523
2524<div>
2525<div>
2526<pre class="source">[
2527 { &quot;c&quot;:
2528 { &quot;custid&quot;: &quot;C35&quot;,
2529 &quot;name&quot;: &quot;J. Roberts&quot;,
2530 &quot;address&quot;:
2531 { &quot;street&quot;: &quot;420 Green St.&quot;,
2532 &quot;city&quot;: &quot;Boston, MA&quot;,
2533 &quot;zipcode&quot;: &quot;02115&quot;
2534 },
2535 &quot;rating&quot;: 565
2536 }
2537 },
2538 { &quot;c&quot;:
2539 { &quot;custid&quot;: &quot;C37&quot;,
2540 &quot;name&quot;: &quot;T. Henry&quot;,
2541 &quot;address&quot;:
2542 { &quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
2543 &quot;city&quot;: &quot;St. Louis, MO&quot;,
2544 &quot;zipcode&quot;: &quot;02115&quot;
2545 },
2546 &quot;rating&quot;: 750
2547 }
2548 }
2549]
2550</pre></div></div>
2551
2552<p>Thus, the clauses following <tt>GROUP AS</tt> can see the original objects by writing subqueries that iterate over the multiset <tt>g</tt>.</p>
2553<p>The extra level named <tt>c</tt> was introduced into this multiset because the groups might have been formed from a join of two or more collections. Suppose that the <tt>FROM</tt> clause looked like <tt>FROM customers AS c, orders AS o</tt>. Then each item in the group would contain both a <tt>customers</tt> object and an <tt>orders</tt> object, and these two objects might both have a field with the same name. To avoid ambiguity, each of the original objects is wrapped in an &#x201c;outer&#x201d; object that gives it the name of its iteration variable in the <tt>FROM</tt> clause. Consider this fragment:</p>
2554
2555<div>
2556<div>
2557<pre class="source">FROM customers AS c, orders AS o
2558WHERE c.custid = o.custid
2559GROUP BY c.address.zipcode
2560GROUP AS g
2561</pre></div></div>
2562
2563<p>In this case, following <tt>GROUP AS g</tt>, the variable <tt>g</tt> would be bound to the following collection:</p>
2564
2565<div>
2566<div>
2567<pre class="source">[
2568 { &quot;c&quot;: { an original customers object },
2569 &quot;o&quot;: { an original orders object }
2570 },
2571 { &quot;c&quot;: { another customers object },
2572 &quot;o&quot;: { another orders object }
2573 },
2574 ...
2575]
2576</pre></div></div>
2577
2578<p>After using <tt>GROUP AS</tt> to make the content of a group accessible, you will probably want to write a subquery to access that content. A subquery for this purpose is written in exactly the same way as any other subquery. The name specified in the <tt>GROUP AS</tt> clause (<tt>g</tt> in the above example) is the name of a collection of objects. You can write a <tt>FROM</tt> clause to iterate over the objects in the collection, and you can specify an iteration variable to represent each object in turn. For <tt>GROUP AS</tt> queries in this manual, we&#x2019;ll use <tt>g</tt>as the name of the reconstituted group, and <tt>gi</tt> as an iteration variable representing one object inside the group. Of course, you can use any names you like for these purposes.</p>
2579<p>Now we are ready to take a look at how <tt>GROUP AS</tt> might be used in a query. Suppose that we want to group customers by zipcode, and for each group we want to see the average credit rating and a list of the individual customers in the group. Here&#x2019;s a query that does that:</p></div>
2580<div class="section">
2581<h5><a name="Example"></a>Example</h5>
2582<p>(Q3.24) For each zipcode, list the average credit rating in that zipcode, followed by the customer numbers and names in numeric order.</p>
2583
2584<div>
2585<div>
2586<pre class="source">FROM customers AS c
2587GROUP BY c.address.zipcode AS zip
2588GROUP AS g
2589SELECT zip, AVG(c.rating) AS `avg credit rating`,
2590 (FROM g AS gi
2591 SELECT gi.c.custid, gi.c.name
2592 ORDER BY gi.c.custid) AS `local customers`
2593ORDER BY zip;
2594</pre></div></div>
2595
2596<p>Result:</p>
2597
2598<div>
2599<div>
2600<pre class="source">[
2601 {
2602 &quot;avg credit rating&quot;: 625,
2603 &quot;local customers&quot;: [
2604 {
2605 &quot;custid&quot;: &quot;C47&quot;,
2606 &quot;name&quot;: &quot;S. Logan&quot;
2607 }
2608 ]
2609 },
2610 {
2611 &quot;avg credit rating&quot;: 657.5,
2612 &quot;local customers&quot;: [
2613 {
2614 &quot;custid&quot;: &quot;C35&quot;,
2615 &quot;name&quot;: &quot;J. Roberts&quot;
2616 },
2617 {
2618 &quot;custid&quot;: &quot;C37&quot;,
2619 &quot;name&quot;: &quot;T. Henry&quot;
2620 }
2621 ],
2622 &quot;zip&quot;: &quot;02115&quot;
2623 },
2624 {
2625 &quot;avg credit rating&quot;: 690,
2626 &quot;local customers&quot;: [
2627 {
2628 &quot;custid&quot;: &quot;C25&quot;,
2629 &quot;name&quot;: &quot;M. Sinclair&quot;
2630 }
2631 ],
2632 &quot;zip&quot;: &quot;02340&quot;
2633 },
2634 {
2635 &quot;avg credit rating&quot;: 695,
2636 &quot;local customers&quot;: [
2637 {
2638 &quot;custid&quot;: &quot;C13&quot;,
2639 &quot;name&quot;: &quot;T. Cody&quot;
2640 },
2641 {
2642 &quot;custid&quot;: &quot;C31&quot;,
2643 &quot;name&quot;: &quot;B. Pruitt&quot;
2644 },
2645 {
2646 &quot;custid&quot;: &quot;C41&quot;,
2647 &quot;name&quot;: &quot;R. Dodge&quot;
2648 }
2649 ],
2650 &quot;zip&quot;: &quot;63101&quot;
2651 }
2652]
2653</pre></div></div>
2654
2655<p>Note that this query contains two <tt>ORDER BY</tt> clauses: one in the outer query and one in the subquery. These two clauses govern the ordering of the outer-level list of zipcodes and the inner-level lists of customers, respectively. Also note that the group of customers with no zipcode comes first in the output list.</p></div></div></div></div>
2656<div class="section">
2657<h2><a name="Selection_and_UNION_ALL"></a><a name="Union_all" id="Union_all">Selection and UNION ALL</a></h2>
2658<div class="section">
2659<div class="section">
2660<div class="section">
2661<h5><a name="Selection"></a>Selection</h5>
2662<p><img src="../images/diagrams/Selection.png" alt="" /></p></div>
2663<div class="section">
2664<h5><a name="UnionOption"></a>UnionOption</h5>
2665<p><img src="../images/diagrams/UnionOption.png" alt="" /></p>
2666<p>In a SQL++ query, two or more query blocks can be connected by the operator <tt>UNION ALL</tt>. The result of a <tt>UNION ALL</tt> between two query blocks contains all the items returned by the first query block, and all the items returned by the second query block. Duplicate items are not eliminated from the query result.</p>
2667<p>As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular, it allows heterogeneity on the input and output streams. A type error will be raised if one of the inputs is not a collection.</p>
2668<p>When two or more query blocks are connected by <tt>UNION ALL</tt>, they can be followed by <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt> clauses that apply to the <tt>UNION</tt> query as a whole. For these clauses to be meaningful, the field-names returned by the two query blocks should match. The following example shows a <tt>UNION ALL</tt> of two query blocks, with an ordering specified for the result.</p>
2669<p>In this example, a customer might be selected because he has ordered more than two different items (first query block) or because he has a high credit rating (second query block). By adding an explanatory string to each query block, the query writer can cause the output objects to be labeled to distinguish these two cases.</p></div>
2670<div class="section">
2671<h5><a name="Example"></a>Example</h5>
2672<p>(Q3.25a) Find customer ids for customers who have placed orders for more than two different items or who have a credit rating greater than 700, with labels to distinguish these cases.</p>
2673
2674<div>
2675<div>
2676<pre class="source">FROM orders AS o, o.items AS i
2677GROUP BY o.orderno, o.custid
2678HAVING COUNT(*) &gt; 2
2679SELECT DISTINCT o.custid AS customer_id, &quot;Big order&quot; AS reason
2680
2681UNION ALL
2682
2683FROM customers AS c
2684WHERE rating &gt; 700
2685SELECT c.custid AS customer_id, &quot;High rating&quot; AS reason
2686ORDER BY customer_id;
2687</pre></div></div>
2688
2689<p>Result:</p>
2690
2691<div>
2692<div>
2693<pre class="source">[
2694 {
2695 &quot;reason&quot;: &quot;High rating&quot;,
2696 &quot;customer_id&quot;: &quot;C13&quot;
2697 },
2698 {
2699 &quot;reason&quot;: &quot;Big order&quot;,
2700 &quot;customer_id&quot;: &quot;C37&quot;
2701 },
2702 {
2703 &quot;reason&quot;: &quot;High rating&quot;,
2704 &quot;customer_id&quot;: &quot;C37&quot;
2705 },
2706 {
2707 &quot;reason&quot;: &quot;Big order&quot;,
2708 &quot;customer_id&quot;: &quot;C41&quot;
2709 }
2710]
2711</pre></div></div>
2712
2713<p>If, on the other hand, you simply want a list of the customer ids and you don&#x2019;t care to preserve the reasons, you can simplify your output by using <tt>SELECT VALUE</tt>, as follows:</p>
2714<p>(Q3.25b) Simplify Q3.25a to return a simple list of unlabeled customer ids.</p>
2715
2716<div>
2717<div>
2718<pre class="source">FROM orders AS o, o.items AS i
2719GROUP BY o.orderno, o.custid
2720HAVING COUNT(*) &gt; 2
2721SELECT VALUE o.custid
2722
2723UNION ALL
2724
2725FROM customers AS c
2726WHERE rating &gt; 700
2727SELECT VALUE c.custid;
2728</pre></div></div>
2729
2730<p>Result:</p>
2731
2732<div>
2733<div>
2734<pre class="source">[
2735 &quot;C37&quot;,
2736 &quot;C41&quot;,
2737 &quot;C13&quot;,
2738 &quot;C37&quot;
2739]
2740</pre></div></div>
2741</div></div></div></div>
2742<div class="section">
2743<h2><a name="WITH_Clause"></a><a name="With_clauses" id="With_clauses">WITH Clause</a></h2>
2744<div class="section">
2745<div class="section">
2746<div class="section">
2747<h5><a name="WithClause"></a>WithClause</h5>
2748<p><img src="../images/diagrams/WithClause.png" alt="" /></p>
2749<p>As in standard SQL, a <tt>WITH</tt> clause can be used to improve the modularity of a query. A <tt>WITH</tt> clause often contains a subquery that is needed to compute some result that is used later in the main query. In cases like this, you can think of the <tt>WITH</tt> clause as computing a &#x201c;temporary view&quot; of the input data. The next example uses a <tt>WITH</tt> clause to compute the total revenue of each order in 2020; then the main part of the query finds the minimum, maximum, and average revenue for orders in that year.</p></div>
2750<div class="section">
2751<h5><a name="Example"></a>Example</h5>
2752<p>(Q3.26) Find the minimum, maximum, and average revenue among all orders in 2020, rounded to the nearest integer.</p>
2753
2754<div>
2755<div>
2756<pre class="source">WITH order_revenue AS
2757 (FROM orders AS o, o.items AS i
2758 WHERE get_year(date(o.order_date)) = 2020
2759 GROUP BY o.orderno
2760 SELECT o.orderno, SUM(i.qty * i.price) AS revenue
2761 )
2762FROM order_revenue
2763SELECT AVG(revenue) AS average,
2764 MIN(revenue) AS minimum,
2765 MAX(revenue) AS maximum;
2766</pre></div></div>
2767
2768<p>Result:</p>
2769
2770<div>
2771<div>
2772<pre class="source">[
2773 {
2774 &quot;average&quot;: 4669.99,
2775 &quot;minimum&quot;: 130.45,
2776 &quot;maximum&quot;: 18847.58
2777 }
2778]
2779</pre></div></div>
2780
2781<p><tt>WITH</tt> can be particularly useful when a value needs to be used several times in a query.</p></div></div></div></div>
2782<div class="section">
2783<h2><a name="ORDER_BY.2C_LIMIT.2C_and_OFFSET_Clauses"></a><a name="Order_By_clauses" id="Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></h2>
2784<div class="section">
2785<div class="section">
2786<div class="section">
2787<h5><a name="OrderbyClause"></a>OrderbyClause</h5>
2788<p><img src="../images/diagrams/OrderbyClause.png" alt="" /></p></div>
2789<div class="section">
2790<h5><a name="LimitClause"></a>LimitClause</h5>
2791<p><img src="../images/diagrams/LimitClause.png" alt="" /></p></div>
2792<div class="section">
2793<h5><a name="OffsetClause"></a>OffsetClause</h5>
2794<p><img src="../images/diagrams/OffsetClause.png" alt="" /></p>
2795<p>The last three (optional) clauses to be processed in a query are <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt>.</p>
2796<p>The <tt>ORDER BY</tt> clause is used to globally sort data in either ascending order (i.e., <tt>ASC</tt>) or descending order (i.e., <tt>DESC</tt>). During ordering (if the <tt>NULLS</tt> modifier is not specified), <tt>MISSING</tt> and <tt>NULL</tt> are treated as being smaller than any other value if they are encountered in the ordering key(s). <tt>MISSING</tt> is treated as smaller than <tt>NULL</tt> if both occur in the data being sorted. The <tt>NULLS</tt> modifier determines how <tt>MISSING</tt> and <tt>NULL</tt> are ordered relative to all other values: first (<tt>NULLS</tt> <tt>FIRST</tt>) or last (<tt>NULLS</tt> <tt>LAST</tt>). The relative order between <tt>MISSING</tt> and <tt>NULL</tt> is not affected by the <tt>NULLS</tt> modifier (i.e. <tt>MISSING</tt> is still treated as smaller than <tt>NULL</tt>). The ordering of values of a given type is consistent with its type&#x2019;s <tt>&lt;=</tt> ordering; the ordering of values across types is implementation-defined but stable.</p>
2797<p>The <tt>LIMIT</tt> clause is used to limit the result set to a specified maximum size. The optional <tt>OFFSET</tt> clause is used to specify a number of items in the output stream to be discarded before the query result begins. The <tt>OFFSET</tt> can also be used as a standalone clause, without the <tt>LIMIT</tt>.</p>
2798<p>The following example illustrates use of the <tt>ORDER BY</tt> and <tt>LIMIT</tt> clauses.</p></div>
2799<div class="section">
2800<h5><a name="Example"></a>Example</h5>
2801<p>(Q3.27) Return the top three customers by rating.</p>
2802
2803<div>
2804<div>
2805<pre class="source">FROM customers AS c
2806SELECT c.custid, c.name, c.rating
2807ORDER BY c.rating DESC
2808LIMIT 3;
2809</pre></div></div>
2810
2811<p>Result:</p>
2812
2813<div>
2814<div>
2815<pre class="source">[
2816 {
2817 &quot;custid&quot;: &quot;C13&quot;,
2818 &quot;name&quot;: &quot;T. Cody&quot;,
2819 &quot;rating&quot;: 750
2820 },
2821 {
2822 &quot;custid&quot;: &quot;C37&quot;,
2823 &quot;name&quot;: &quot;T. Henry&quot;,
2824 &quot;rating&quot;: 750
2825 },
2826 {
2827 &quot;custid&quot;: &quot;C25&quot;,
2828 &quot;name&quot;: &quot;M. Sinclair&quot;,
2829 &quot;rating&quot;: 690
2830 }
2831]
2832</pre></div></div>
2833
2834<p>The following example illustrates the use of <tt>OFFSET</tt>:</p></div>
2835<div class="section">
2836<h5><a name="Example"></a>Example</h5>
2837<p>(Q3.38) Find the customer with the third-highest credit rating.</p>
2838
2839<div>
2840<div>
2841<pre class="source">FROM customers AS c
2842SELECT c.custid, c.name, c.rating
2843ORDER BY c.rating DESC
2844LIMIT 1 OFFSET 2;
2845</pre></div></div>
2846
2847<p>Result:</p>
2848
2849<div>
2850<div>
2851<pre class="source">[
2852 {
2853 &quot;custid&quot;: &quot;C25&quot;,
2854 &quot;name&quot;: &quot;M. Sinclair&quot;,
2855 &quot;rating&quot;: 690
2856 }
2857]
2858</pre></div></div>
2859</div></div></div></div>
2860<div class="section">
2861<h2><a name="Subqueries" id="Subqueries">Subqueries</a></h2>
2862<div class="section">
2863<div class="section">
2864<div class="section">
2865<h5><a name="Subquery"></a>Subquery</h5>
2866<p><img src="../images/diagrams/Subquery.png" alt="" /></p>
2867<p>A subquery is simply a query surrounded by parentheses. In SQL++, a subquery can appear anywhere that an expression can appear. Like any query, a subquery always returns a collection, even if the collection contains only a single value or is empty. If the subquery has a SELECT clause, it returns a collection of objects. If the subquery has a SELECT VALUE clause, it returns a collection of scalar values. If a single scalar value is expected, the indexing operator [0] can be used to extract the single scalar value from the collection.</p></div>
2868<div class="section">
2869<h5><a name="Example"></a>Example</h5>
2870<p>(Q3.29) (Subquery in SELECT clause) For every order that includes item no. 120, find the order number, customer id, and customer name.</p>
2871<p>Here, the subquery is used to find a customer name, given a customer id. Since the outer query expects a scalar result, the subquery uses SELECT VALUE and is followed by the indexing operator [0].</p>
2872
2873<div>
2874<div>
2875<pre class="source">FROM orders AS o, o.items AS i
2876WHERE i.itemno = 120
2877SELECT o.orderno, o.custid,
2878 (FROM customers AS c
2879 WHERE c.custid = o.custid
2880 SELECT VALUE c.name)[0] AS name;
2881</pre></div></div>
2882
2883<p>Result:</p>
2884
2885<div>
2886<div>
2887<pre class="source">[
2888 {
2889 &quot;orderno&quot;: 1003,
2890 &quot;custid&quot;: &quot;C31&quot;,
2891 &quot;name&quot;: &quot;B. Pruitt&quot;
2892 },
2893 {
2894 &quot;orderno&quot;: 1006,
2895 &quot;custid&quot;: &quot;C41&quot;,
2896 &quot;name&quot;: &quot;R. Dodge&quot;
2897 }
2898]
2899</pre></div></div>
2900</div>
2901<div class="section">
2902<h5><a name="Example"></a>Example</h5>
2903<p>(Q3.30) (Subquery in WHERE clause) Find the customer number, name, and rating of all customers whose rating is greater than the average rating.</p>
2904<p>Here, the subquery is used to find the average rating among all customers. Once again, SELECT VALUE and indexing [0] have been used to get a single scalar value.</p>
2905
2906<div>
2907<div>
2908<pre class="source">FROM customers AS c1
2909WHERE c1.rating &gt;
2910 (FROM customers AS c2
2911 SELECT VALUE AVG(c2.rating))[0]
2912SELECT c1.custid, c1.name, c1.rating;
2913</pre></div></div>
2914
2915<p>Result:</p>
2916
2917<div>
2918<div>
2919<pre class="source">[
2920 {
2921 &quot;custid&quot;: &quot;C13&quot;,
2922 &quot;name&quot;: &quot;T. Cody&quot;,
2923 &quot;rating&quot;: 750
2924 },
2925 {
2926 &quot;custid&quot;: &quot;C25&quot;,
2927 &quot;name&quot;: &quot;M. Sinclair&quot;,
2928 &quot;rating&quot;: 690
2929 },
2930 {
2931 &quot;custid&quot;: &quot;C37&quot;,
2932 &quot;name&quot;: &quot;T. Henry&quot;,
2933 &quot;rating&quot;: 750
2934 }
2935]
2936</pre></div></div>
2937</div>
2938<div class="section">
2939<h5><a name="Example"></a>Example</h5>
2940<p>(Q3.31) (Subquery in FROM clause) Compute the total revenue (sum over items of quantity time price) for each order, then find the average, maximum, and minimum total revenue over all orders.</p>
2941<p>Here, the FROM clause expects to iterate over a collection of objects, so the subquery uses an ordinary SELECT and does not need to be indexed. You might think of a FROM clause as a &#x201c;natural home&#x201d; for a subquery.</p>
2942
2943<div>
2944<div>
2945<pre class="source">FROM
2946 (FROM orders AS o, o.items AS i
2947 GROUP BY o.orderno
2948 SELECT o.orderno, SUM(i.qty * i.price) AS revenue
2949 ) AS r
2950SELECT AVG(r.revenue) AS average,
2951 MIN(r.revenue) AS minimum,
2952 MAX(r.revenue) AS maximum;
2953</pre></div></div>
2954
2955<p>Result:</p>
2956
2957<div>
2958<div>
2959<pre class="source">[
2960 {
2961 &quot;average&quot;: 4669.99,
2962 &quot;minimum&quot;: 130.45,
2963 &quot;maximum&quot;: 18847.58
2964 }
2965]
2966</pre></div></div>
2967
2968<p>Note the similarity between Q3.26 and Q3.31. This illustrates how a subquery can often be moved into a <tt>WITH</tt> clause to improve the modularity and readability of a query.</p><!--
2969 ! Licensed to the Apache Software Foundation (ASF) under one
2970 ! or more contributor license agreements. See the NOTICE file
2971 ! distributed with this work for additional information
2972 ! regarding copyright ownership. The ASF licenses this file
2973 ! to you under the Apache License, Version 2.0 (the
2974 ! "License"); you may not use this file except in compliance
2975 ! with the License. You may obtain a copy of the License at
2976 !
2977 ! http://www.apache.org/licenses/LICENSE-2.0
2978 !
2979 ! Unless required by applicable law or agreed to in writing,
2980 ! software distributed under the License is distributed on an
2981 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2982 ! KIND, either express or implied. See the License for the
2983 ! specific language governing permissions and limitations
2984 ! under the License.
2985 !-->
2986
2987<h1><a name="Over_clauses" id="Over_clauses">4. Window Functions</a></h1><!--
2988 ! Licensed to the Apache Software Foundation (ASF) under one
2989 ! or more contributor license agreements. See the NOTICE file
2990 ! distributed with this work for additional information
2991 ! regarding copyright ownership. The ASF licenses this file
2992 ! to you under the Apache License, Version 2.0 (the
2993 ! "License"); you may not use this file except in compliance
2994 ! with the License. You may obtain a copy of the License at
2995 !
2996 ! http://www.apache.org/licenses/LICENSE-2.0
2997 !
2998 ! Unless required by applicable law or agreed to in writing,
2999 ! software distributed under the License is distributed on an
3000 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3001 ! KIND, either express or implied. See the License for the
3002 ! specific language governing permissions and limitations
3003 ! under the License.
3004 !-->
3005
3006<p>Window functions are special functions that compute aggregate values over a &#x201c;window&#x201d; of input data. Like an ordinary function, a window function returns a value for every item in the input dataset. But in the case of a window function, the value returned by the function can depend not only on the argument of the function, but also on other items in the same collection. For example, a window function applied to a set of employees might return the rank of each employee in the set, as measured by salary. As another example, a window function applied to a set of items, ordered by purchase date, might return the running total of the cost of the items.</p>
3007<p>A window function call is identified by an <tt>OVER</tt> clause, which can specify three things: partitioning, ordering, and framing. The partitioning specification is like a <tt>GROUP BY</tt>: it splits the input data into partitions. For example, a set of employees might be partitioned by department. The window function, when applied to a given object, is influenced only by other objects in the same partition. The ordering specification is like an <tt>ORDER BY</tt>: it determines the ordering of the objects in each partition. The framing specification defines a &#x201c;frame&#x201d; that moves through the partition, defining how the result for each object depends on nearby objects. For example, the frame for a current object might consist of the two objects before and after the current one; or it might consist of all the objects before the current one in the same partition. A window function call may also specify some options that control (for example) how nulls are handled by the function.</p>
3008<p>Here is an example of a window function call:</p>
3009
3010<div>
3011<div>
3012<pre class="source">SELECT deptno, purchase_date, item, cost,
3013 SUM(cost) OVER (
3014 PARTITION BY deptno
3015 ORDER BY purchase_date
3016 ROWS UNBOUNDED PRECEDING) AS running_total_cost
3017FROM purchases
3018ORDER BY deptno, purchase_date
3019</pre></div></div>
3020
3021<p>This example partitions the <tt>purchases</tt> dataset by department number. Within each department, it orders the <tt>purchases</tt> by date and computes a running total cost for each item, using the frame specification <tt>ROWS UNBOUNDED PRECEDING</tt>. Note that the <tt>ORDER BY</tt> clause in the window function is separate and independent from the <tt>ORDER BY</tt> clause of the query as a whole.</p>
3022<p>The general syntax of a window function call is specified in this section. SQL++ has a set of builtin window functions, which are listed and explained in the <a href="builtins.html#WindowFunctions">Window Functions</a> section of the builtin functions page. In addition, standard SQL aggregate functions such as <tt>SUM</tt> and <tt>AVG</tt> can be used as window functions if they are used with an <tt>OVER</tt> clause.</p></div></div></div></div>
3023<div class="section">
3024<h2><a name="Window_Function_Call"></a><a name="Window_function_call" id="Window_function_call">Window Function Call</a></h2>
3025<div class="section">
3026<div class="section">
3027<div class="section">
3028<h5><a name="WindowFunctionCall"></a>WindowFunctionCall</h5>
3029<p><img src="../images/diagrams/WindowFunctionCall.png" alt="" /></p></div>
3030<div class="section">
3031<h5><a name="WindowFunctionType"></a>WindowFunctionType</h5>
3032<p><img src="../images/diagrams/WindowFunctionType.png" alt="" /></p>
3033<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section for a list of aggregate functions.</p>
3034<p>Refer to the <a href="builtins.html#WindowFunctions">Window Functions</a> section for a list of window functions.</p></div></div></div>
3035<div class="section">
3036<h3><a name="Window_Function_Arguments"></a><a name="Window_function_arguments" id="Window_function_arguments">Window Function Arguments</a></h3>
3037<div class="section">
3038<div class="section">
3039<h5><a name="WindowFunctionArguments"></a>WindowFunctionArguments</h5>
3040<p><img src="../images/diagrams/WindowFunctionArguments.png" alt="" /></p>
3041<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section or the <a href="builtins.html#WindowFunctions">Window Functions</a> section for details of the arguments for individual functions.</p></div></div></div>
3042<div class="section">
3043<h3><a name="Window_Function_Options"></a><a name="Window_function_options" id="Window_function_options">Window Function Options</a></h3>
3044<div class="section">
3045<div class="section">
3046<h5><a name="WindowFunctionOptions"></a>WindowFunctionOptions</h5>
3047<p><img src="../images/diagrams/WindowFunctionOptions.png" alt="" /></p>
3048<p>Window function options cannot be used with <a href="builtins.html#AggregateFunctions">aggregate functions</a>.</p>
3049<p>Window function options can only be used with some <a href="builtins.html#WindowFunctions">window functions</a>, as described below.</p>
3050<p>The <i>FROM modifier</i> determines whether the computation begins at the first or last tuple in the window. It is optional and can only be used with the <tt>nth_value()</tt> function. If it is omitted, the default setting is <tt>FROM FIRST</tt>.</p>
3051<p>The <i>NULLS modifier</i> determines whether NULL values are included in the computation, or ignored. MISSING values are treated the same way as NULL values. It is also optional and can only be used with the <tt>first_value()</tt>, <tt>last_value()</tt>, <tt>nth_value()</tt>, <tt>lag()</tt>, and <tt>lead()</tt> functions. If omitted, the default setting is <tt>RESPECT NULLS</tt>.</p></div></div></div>
3052<div class="section">
3053<h3><a name="Window_Frame_Variable"></a><a name="Window_frame_variable" id="Window_frame_variable">Window Frame Variable</a></h3>
3054<p>The <tt>AS</tt> keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents of the frame. When using a built-in <a href="builtins.html#AggregateFunctions">aggregate function</a> as a window function, the function&#x2019;s argument must be a subquery which refers to this alias, for example:</p>
3055
3056<div>
3057<div>
3058<pre class="source">SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field))
3059OVER alias AS (PARTITION BY &#x2026; ORDER BY &#x2026;)
3060FROM source AS src
3061</pre></div></div>
3062
3063<p>The alias is not necessary when using a <a href="builtins.html#WindowFunctions">window function</a>, or when using a standard SQL aggregate function with the <tt>OVER</tt> clause.</p></div>
3064<div class="section">
3065<h3><a name="Window_Definition"></a><a name="Window_definition" id="Window_definition">Window Definition</a></h3>
3066<div class="section">
3067<div class="section">
3068<h5><a name="WindowDefinition"></a>WindowDefinition</h5>
3069<p><img src="../images/diagrams/WindowDefinition.png" alt="" /></p>
3070<p>The <i>window definition</i> specifies the partitioning, ordering, and framing for window functions.</p></div></div>
3071<div class="section">
3072<h4><a name="Window_Partition_Clause"></a><a name="Window_partition_clause" id="Window_partition_clause">Window Partition Clause</a></h4>
3073<div class="section">
3074<h5><a name="WindowPartitionClause"></a>WindowPartitionClause</h5>
3075<p><img src="../images/diagrams/WindowPartitionClause.png" alt="" /></p>
3076<p>The <i>window partition clause</i> divides the tuples into logical partitions using one or more expressions.</p>
3077<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
3078<p>This clause is optional. If omitted, all tuples are united in a single partition.</p></div></div>
3079<div class="section">
3080<h4><a name="Window_Order_Clause"></a><a name="Window_order_clause" id="Window_order_clause">Window Order Clause</a></h4>
3081<div class="section">
3082<h5><a name="WindowOrderClause"></a>WindowOrderClause</h5>
3083<p><img src="../images/diagrams/WindowOrderClause.png" alt="" /></p>
3084<p>The <i>window order clause</i> determines how tuples are ordered within each partition. The window function works on tuples in the order specified by this clause.</p>
3085<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
3086<p>This clause is optional. If omitted, all tuples are considered peers, i.e. their order is tied. When tuples in the window partition are tied, each window function behaves differently.</p>
3087<ul>
3088
3089<li>
3090
3091<p>The <tt>row_number()</tt> function returns a distinct number for each tuple. If tuples are tied, the results may be unpredictable.</p>
3092</li>
3093<li>
3094
3095<p>The <tt>rank()</tt>, <tt>dense_rank()</tt>, <tt>percent_rank()</tt>, and <tt>cume_dist()</tt> functions return the same result for each tuple.</p>
3096</li>
3097<li>
3098
3099<p>For other functions, if the <a href="#Window_frame_clause">window frame</a> is defined by <tt>ROWS</tt>, the results may be unpredictable. If the window frame is defined by <tt>RANGE</tt> or <tt>GROUPS</tt>, the results are same for each tuple.</p>
3100</li>
3101</ul>
3102<p><b>Note:</b> This clause does not guarantee the overall order of the query results. To guarantee the order of the final results, use the query <tt>ORDER BY</tt> clause.</p></div></div>
3103<div class="section">
3104<h4><a name="Window_Frame_Clause"></a><a name="Window_frame_clause" id="Window_frame_clause">Window Frame Clause</a></h4>
3105<div class="section">
3106<h5><a name="WindowFrameClause"></a>WindowFrameClause</h5>
3107<p><img src="../images/diagrams/WindowFrameClause.png" alt="" /></p>
3108<p>The <i>window frame clause</i> defines the window frame. It can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> &#x2014; refer to the descriptions of individual functions for more details. It is optional and allowed only when the <a href="#Window_order_clause">window order clause</a> is present.</p>
3109<ul>
3110
3111<li>
3112
3113<p>If this clause is omitted and there is no <a href="#Window_order_clause">window order clause</a>, the window frame is the entire partition.</p>
3114</li>
3115<li>
3116
3117<p>If this clause is omitted but there is a <a href="#Window_order_clause">window order clause</a>, the window frame becomes all tuples in the partition preceding the current tuple and its peers &#x2014; the same as <tt>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</tt>.</p>
3118</li>
3119</ul>
3120<p>The window frame can be defined in the following ways:</p>
3121<ul>
3122
3123<li>
3124
3125<p><tt>ROWS</tt>: Counts the exact number of tuples within the frame. If window ordering doesn&#x2019;t result in unique ordering, the function may produce unpredictable results. You can add a unique expression or more window ordering expressions to produce unique ordering.</p>
3126</li>
3127<li>
3128
3129<p><tt>RANGE</tt>: Looks for a value offset within the frame. The function produces deterministic results.</p>
3130</li>
3131<li>
3132
3133<p><tt>GROUPS</tt>: Counts all groups of tied rows within the frame. The function produces deterministic results.</p>
3134</li>
3135</ul>
3136<p><b>Note:</b> If this clause uses <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, the <a href="#Window_order_clause">window order clause</a> must have only a single ordering term. The ordering term expression must evaluate to a number. If these conditions are not met, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0. This restriction does not apply when the window frame uses <tt>ROWS</tt> or <tt>GROUPS</tt>.</p>
3137<p><b>Tip:</b> The <tt>RANGE</tt> window frame is commonly used to define window frames based on date or time. If you want to use <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, and you want to use an ordering expression based on date or time, the expression in <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt> must use a data type that can be added to the ordering expression.</p></div></div>
3138<div class="section">
3139<h4><a name="Window_Frame_Extent"></a><a name="Window_frame_extent" id="Window_frame_extent">Window Frame Extent</a></h4>
3140<div class="section">
3141<h5><a name="WindowFrameExtent"></a>WindowFrameExtent</h5>
3142<p><img src="../images/diagrams/WindowFrameExtent.png" alt="" /></p>
3143<p>The <i>window frame extent clause</i> specifies the start point and end point of the window frame. The expression before <tt>AND</tt> is the start point and the expression after <tt>AND</tt> is the end point. If <tt>BETWEEN</tt> is omitted, you can only specify the start point; the end point becomes <tt>CURRENT ROW</tt>.</p>
3144<p>The window frame end point can&#x2019;t be before the start point. If this clause violates this restriction explicitly, an error will result. If it violates this restriction implicitly, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0.</p>
3145<p>Window frame extents that result in an explicit violation are:</p>
3146<ul>
3147
3148<li>
3149
3150<p><tt>BETWEEN CURRENT ROW AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
3151</li>
3152<li>
3153
3154<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
3155</li>
3156<li>
3157
3158<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND CURRENT ROW</tt></p>
3159</li>
3160</ul>
3161<p>Window frame extents that result in an implicit violation are:</p>
3162<ul>
3163
3164<li>
3165
3166<p><tt>BETWEEN UNBOUNDED PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> &#x2014; if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
3167</li>
3168<li>
3169
3170<p><tt>BETWEEN</tt> <i>Expr</i> <tt>PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> &#x2014; if the second <i>Expr</i> is greater than or equal to the first <i>Expr</i>, all result sets will generate an empty window frame.</p>
3171</li>
3172<li>
3173
3174<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>FOLLOWING</tt> &#x2014; if the first <i>Expr</i> is greater than or equal to the second <i>Expr</i>, all result sets will generate an empty window frame.</p>
3175</li>
3176<li>
3177
3178<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND UNBOUNDED FOLLOWING</tt> &#x2014; if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
3179</li>
3180<li>
3181
3182<p>If the <a href="#Window_frame_exclusion">window frame exclusion clause</a> is present, any window frame specification may result in empty window frame.</p>
3183</li>
3184</ul>
3185<p>The <i>Expr</i> must be a positive constant or an expression that evaluates as a positive number. For <tt>ROWS</tt> or <tt>GROUPS</tt>, the <i>Expr</i> must be an integer.</p></div></div>
3186<div class="section">
3187<h4><a name="Window_Frame_Exclusion"></a><a name="Window_frame_exclusion" id="Window_frame_exclusion">Window Frame Exclusion</a></h4>
3188<div class="section">
3189<h5><a name="WindowFrameExclusion"></a>WindowFrameExclusion</h5>
3190<p><img src="../images/diagrams/WindowFrameExclusion.png" alt="" /></p>
3191<p>The <i>window frame exclusion clause</i> enables you to exclude specified tuples from the window frame.</p>
3192<p>This clause can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> &#x2014; refer to the descriptions of individual functions for more details.</p>
3193<p>This clause is allowed only when the <a href="#Window_frame_clause">window frame clause</a> is present.</p>
3194<p>This clause is optional. If this clause is omitted, the default is no exclusion &#x2014; the same as <tt>EXCLUDE NO OTHERS</tt>.</p>
3195<ul>
3196
3197<li>
3198
3199<p><tt>EXCLUDE CURRENT ROW</tt>: If the current tuple is still part of the window frame, it is removed from the window frame.</p>
3200</li>
3201<li>
3202
3203<p><tt>EXCLUDE GROUP</tt>: The current tuple and any peers of the current tuple are removed from the window frame.</p>
3204</li>
3205<li>
3206
3207<p><tt>EXCLUDE TIES</tt>: Any peers of the current tuple, but not the current tuple itself, are removed from the window frame.</p>
3208</li>
3209<li>
3210
3211<p><tt>EXCLUDE NO OTHERS</tt>: No additional tuples are removed from the window frame.</p>
3212</li>
3213</ul>
3214<p>If the current tuple is already removed from the window frame, then it remains removed from the window frame.</p><!--
3215 ! Licensed to the Apache Software Foundation (ASF) under one
3216 ! or more contributor license agreements. See the NOTICE file
3217 ! distributed with this work for additional information
3218 ! regarding copyright ownership. The ASF licenses this file
3219 ! to you under the Apache License, Version 2.0 (the
3220 ! "License"); you may not use this file except in compliance
3221 ! with the License. You may obtain a copy of the License at
3222 !
3223 ! http://www.apache.org/licenses/LICENSE-2.0
3224 !
3225 ! Unless required by applicable law or agreed to in writing,
3226 ! software distributed under the License is distributed on an
3227 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3228 ! KIND, either express or implied. See the License for the
3229 ! specific language governing permissions and limitations
3230 ! under the License.
3231 !-->
3232
3233<h1><a name="Errors" id="Errors">5. Errors</a></h1><!--
3234 ! Licensed to the Apache Software Foundation (ASF) under one
3235 ! or more contributor license agreements. See the NOTICE file
3236 ! distributed with this work for additional information
3237 ! regarding copyright ownership. The ASF licenses this file
3238 ! to you under the Apache License, Version 2.0 (the
3239 ! "License"); you may not use this file except in compliance
3240 ! with the License. You may obtain a copy of the License at
3241 !
3242 ! http://www.apache.org/licenses/LICENSE-2.0
3243 !
3244 ! Unless required by applicable law or agreed to in writing,
3245 ! software distributed under the License is distributed on an
3246 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3247 ! KIND, either express or implied. See the License for the
3248 ! specific language governing permissions and limitations
3249 ! under the License.
3250 !-->
3251
3252<p>A query can potentially result in one of the following errors:</p>
3253<ul>
3254
3255<li>syntax error,</li>
3256<li>identifier resolution error,</li>
3257<li>type error,</li>
3258<li>resource error.</li>
3259</ul>
3260<p>If the query processor runs into any error, it will terminate the ongoing processing of the query and immediately return an error message to the client.</p></div></div></div></div>
3261<div class="section">
3262<h2><a name="Syntax_Errors"></a><a name="Syntax_errors" id="Syntax_errors">Syntax Errors</a></h2>
3263<p>A valid query must satisfy the grammar rules of the query language. Otherwise, a syntax error will be raised.</p>
3264<div class="section">
3265<div class="section">
3266<div class="section">
3267<h5><a name="Example"></a>Example</h5>
3268<p>(Q4.1)</p>
3269
3270<div>
3271<div>
3272<pre class="source">customers AS c
3273SELECT *
3274</pre></div></div>
3275
3276<p>Since the queryhas no <tt>FROM</tt> keyword before the dataset <tt>customers</tt>, we will get a syntax error as follows:</p>
3277
3278<div>
3279<div>
3280<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 2 &gt;&gt;customers AS c&lt;&lt; Encountered \&quot;AS\&quot; at column 11. &quot;
3281</pre></div></div>
3282</div>
3283<div class="section">
3284<h5><a name="Example"></a>Example</h5>
3285<p>(Q4.2)</p>
3286
3287<div>
3288<div>
3289<pre class="source"> FROM customers AS c
3290 WHERE type=&quot;advertiser&quot;
3291 SELECT *;
3292</pre></div></div>
3293
3294<p>Since &#x201c;type&#x201d; is a reserved keyword in the query parser, we will get a syntax error as follows:</p>
3295
3296<div>
3297<div>
3298<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 3 &gt;&gt; WHERE type=\&quot;advertiser\&quot;&lt;&lt; Encountered \&quot;type\&quot; at column 8. &quot;;
3299</pre></div></div>
3300</div></div></div></div>
3301<div class="section">
3302<h2><a name="Identifier_Resolution_Errors"></a><a name="Identifier_resolution_errors" id="Identifier_resolution_errors">Identifier Resolution Errors</a></h2>
3303<p>Referring to an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access.</p>
3304<div class="section">
3305<div class="section">
3306<div class="section">
3307<h5><a name="Example"></a>Example</h5>
3308<p>(Q4.3)</p>
3309
3310<div>
3311<div>
3312<pre class="source"> FROM customer AS c
3313 SELECT *
3314</pre></div></div>
3315
3316<p>If we have a typo as above in &#x201c;customers&#x201d; that misses the dataset name&#x2019;s ending &#x201c;s&#x201d;, we will get an identifier resolution error as follows:</p>
3317
3318<div>
3319<div>
3320<pre class="source">ERROR: Code: 1 &quot;ASX1077: Cannot find dataset customer in dataverse Commerce nor an alias with name customer (in line 2, at column 7)&quot;
3321</pre></div></div>
3322</div>
3323<div class="section">
3324<h5><a name="Example"></a>Example</h5>
3325<p>(Q4.4)</p>
3326
3327<div>
3328<div>
3329<pre class="source"> FROM customers AS c JOIN orders AS o ON c.custid = o.custid
3330 SELECT name, orderno;
3331</pre></div></div>
3332
3333<p>If the compiler cannot figure out how to resolve an unqualified field name, which will occur if there is more than one variable in scope (e.g., <tt>customers AS c</tt> and <tt>orders AS o</tt> as above), we will get an identifier resolution error as follows:</p>
3334
3335<div>
3336<div>
3337<pre class="source">ERROR: Code: 1 &quot;ASX1074: Cannot resolve ambiguous alias reference for identifier name (in line 3, at column 9)&quot;
3338</pre></div></div>
3339
3340<p>The same can happen when failing to properly identify the <tt>GROUP BY</tt> expression.</p>
3341<p>(Q4.5)</p>
3342
3343<div>
3344<div>
3345<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
3346FROM orders AS o
3347GROUP BY custid;
3348</pre></div></div>
3349
3350<p>Result:</p>
3351
3352<div>
3353<div>
3354<pre class="source">ERROR: Code: 1 &quot;ASX1073: Cannot resolve alias reference for undefined identifier o (in line 2, at column 8)&quot;
3355</pre></div></div>
3356</div></div></div></div>
3357<div class="section">
3358<h2><a name="Type_Errors"></a><a name="Type_errors" id="Type_errors">Type Errors</a></h2>
3359<p>The query compiler does type checks based on its available type information. In addition, the query runtime also reports type errors if a data model instance it processes does not satisfy the type requirement.</p>
3360<div class="section">
3361<div class="section">
3362<div class="section">
3363<h5><a name="Example"></a>Example</h5>
3364<p>(Q4.6)</p>
3365
3366<div>
3367<div>
3368<pre class="source">get_day(10/11/2020);
3369</pre></div></div>
3370
3371<p>Since function <tt>get_day</tt> can only process duration, daytimeduration, date, or datetime input values, we will get a type error as follows:</p>
3372
3373<div>
3374<div>
3375<pre class="source">ERROR: Code: 1 &quot;ASX0002: Type mismatch: function get-day expects its 1st input parameter to be of type duration, daytimeduration, date or datetime, but the actual input type is double (in line 2, at column 1)&quot;
3376</pre></div></div>
3377</div></div></div></div>
3378<div class="section">
3379<h2><a name="Resource_Errors"></a><a name="Resource_errors" id="Resource_errors">Resource Errors</a></h2>
3380<p>A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource errors could be potentially be seen when running the system:</p>
3381
3382<div>
3383<div>
3384<pre class="source">Error: no space left on device
3385Error: too many open files
3386</pre></div></div>
3387
3388<p>The &#x201c;no space left on device&#x201d; issue usually can be fixed by cleaning up disk space and reserving more disk space for the system. The &#x201c;too many open files&#x201d; issue usually can be fixed by a system administrator, following the instructions <a class="externalLink" href="https://easyengine.io/tutorials/linux/increase-open-files-limit/">here</a>.</p><!--
3389 ! Licensed to the Apache Software Foundation (ASF) under one
3390 ! or more contributor license agreements. See the NOTICE file
3391 ! distributed with this work for additional information
3392 ! regarding copyright ownership. The ASF licenses this file
3393 ! to you under the Apache License, Version 2.0 (the
3394 ! "License"); you may not use this file except in compliance
3395 ! with the License. You may obtain a copy of the License at
3396 !
3397 ! http://www.apache.org/licenses/LICENSE-2.0
3398 !
3399 ! Unless required by applicable law or agreed to in writing,
3400 ! software distributed under the License is distributed on an
3401 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3402 ! KIND, either express or implied. See the License for the
3403 ! specific language governing permissions and limitations
3404 ! under the License.
3405 !-->
3406
3407<h1><a name="Vs_SQL-92" id="Vs_SQL-92">6. Differences from SQL-92</a></h1><!--
3408 ! Licensed to the Apache Software Foundation (ASF) under one
3409 ! or more contributor license agreements. See the NOTICE file
3410 ! distributed with this work for additional information
3411 ! regarding copyright ownership. The ASF licenses this file
3412 ! to you under the Apache License, Version 2.0 (the
3413 ! "License"); you may not use this file except in compliance
3414 ! with the License. You may obtain a copy of the License at
3415 !
3416 ! http://www.apache.org/licenses/LICENSE-2.0
3417 !
3418 ! Unless required by applicable law or agreed to in writing,
3419 ! software distributed under the License is distributed on an
3420 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3421 ! KIND, either express or implied. See the License for the
3422 ! specific language governing permissions and limitations
3423 ! under the License.
3424 !-->
3425
3426<p>SQL++ offers the following additional features beyond SQL-92:</p>
3427<ul>
3428
3429<li>Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.</li>
3430<li>Schema-free: The query language does not assume the existence of a static schema for any data that it processes.</li>
3431<li>Correlated <tt>FROM</tt> terms: A right-side <tt>FROM</tt> term expression can refer to variables defined by <tt>FROM</tt> terms on its left.</li>
3432<li>Powerful <tt>GROUP BY</tt>: In addition to a set of aggregate functions as in standard SQL, the groups created by the <tt>GROUP BY</tt> clause are directly usable in nested queries and/or to obtain nested results.</li>
3433<li>Generalized <tt>SELECT</tt> clause: A <tt>SELECT</tt> clause can return any type of collection, while in SQL-92, a <tt>SELECT</tt> clause has to return a (homogeneous) collection of objects.</li>
3434</ul>
3435<p>The following matrix is a quick &#x201c;SQL-92 compatibility cheat sheet&#x201d; for SQL++.</p>
3436<table border="0" class="table table-striped">
3437<thead>
3438
3439<tr class="a">
3440<th> Feature </th>
3441<th> SQL++ </th>
3442<th> SQL-92 </th>
3443<th> Why different? </th></tr>
3444</thead><tbody>
3445
3446<tr class="b">
3447<td> SELECT * </td>
3448<td> Returns nested objects </td>
3449<td> Returns flattened concatenated objects </td>
3450<td> Nested collections are 1st class citizens </td></tr>
3451<tr class="a">
3452<td> SELECT list </td>
3453<td> order not preserved </td>
3454<td> order preserved </td>
3455<td> Fields in a JSON object are not ordered </td></tr>
3456<tr class="b">
3457<td> Subquery </td>
3458<td> Returns a collection </td>
3459<td> The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function </td>
3460<td> Nested collections are 1st class citizens </td></tr>
3461<tr class="a">
3462<td> LEFT OUTER JOIN </td>
3463<td> Fills in <tt>MISSING</tt>(s) for non-matches </td>
3464<td> Fills in <tt>NULL</tt>(s) for non-matches </td>
3465<td> &#x201c;Absence&#x201d; is more appropriate than &#x201c;unknown&#x201d; here </td></tr>
3466<tr class="b">
3467<td> UNION ALL </td>
3468<td> Allows heterogeneous inputs and output </td>
3469<td> Input streams must be UNION-compatible and output field names are drawn from the first input stream </td>
3470<td> Heterogenity and nested collections are common </td></tr>
3471<tr class="a">
3472<td> IN constant_expr </td>
3473<td> The constant expression has to be an array or multiset, i.e., [..,..,&#x2026;] </td>
3474<td> The constant collection can be represented as comma-separated items in a paren pair </td>
3475<td> Nested collections are 1st class citizens </td></tr>
3476<tr class="b">
3477<td> String literal </td>
3478<td> Double quotes or single quotes </td>
3479<td> Single quotes only </td>
3480<td> Double quoted strings are pervasive in JSON</td></tr>
3481<tr class="a">
3482<td> Delimited identifiers </td>
3483<td> Backticks </td>
3484<td> Double quotes </td>
3485<td> Double quoted strings are pervasive in JSON </td></tr>
3486</tbody>
3487</table>
3488<p>The following SQL-92 features are not implemented yet. However, SQL++ does not conflict with these features:</p>
3489<ul>
3490
3491<li>CROSS JOIN, NATURAL JOIN, UNION JOIN</li>
3492<li>FULL OUTER JOIN</li>
3493<li>INTERSECT, EXCEPT, UNION with set semantics</li>
3494<li>CAST expression</li>
3495<li>ALL and SOME predicates for linking to subqueries</li>
3496<li>UNIQUE predicate (tests a collection for duplicates)</li>
3497<li>MATCH predicate (tests for referential integrity)</li>
3498<li>Row and Table constructors</li>
3499<li>Preserved order for expressions in a SELECT list</li>
3500</ul><!--
3501 ! Licensed to the Apache Software Foundation (ASF) under one
3502 ! or more contributor license agreements. See the NOTICE file
3503 ! distributed with this work for additional information
3504 ! regarding copyright ownership. The ASF licenses this file
3505 ! to you under the Apache License, Version 2.0 (the
3506 ! "License"); you may not use this file except in compliance
3507 ! with the License. You may obtain a copy of the License at
3508 !
3509 ! http://www.apache.org/licenses/LICENSE-2.0
3510 !
3511 ! Unless required by applicable law or agreed to in writing,
3512 ! software distributed under the License is distributed on an
3513 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3514 ! KIND, either express or implied. See the License for the
3515 ! specific language governing permissions and limitations
3516 ! under the License.
3517 !-->
3518
3519<h1><a name="DDL_and_DML_statements" id="DDL_and_DML_statements">7. DDL and DML statements</a></h1>
3520<div class="section">
3521<div class="section">
3522<div class="section">
3523<h5><a name="Stmnt"></a>Stmnt</h5>
3524<p><img src="../images/diagrams/Stmnt.png" alt="" /></p></div>
3525<div class="section">
3526<h5><a name="SingleStmnt"></a>SingleStmnt</h5>
3527<p><img src="../images/diagrams/SingleStmnt.png" alt="" /></p>
3528<p>In addition to queries, an implementation of SQL++ needs to support statements for data definition and manipulation purposes as well as controlling the context to be used in evaluating query expressions. This section details the DDL and DML statements supported in SQL++ as realized today in Apache AsterixDB.</p><!--
3529 ! Licensed to the Apache Software Foundation (ASF) under one
3530 ! or more contributor license agreements. See the NOTICE file
3531 ! distributed with this work for additional information
3532 ! regarding copyright ownership. The ASF licenses this file
3533 ! to you under the Apache License, Version 2.0 (the
3534 ! "License"); you may not use this file except in compliance
3535 ! with the License. You may obtain a copy of the License at
3536 !
3537 ! http://www.apache.org/licenses/LICENSE-2.0
3538 !
3539 ! Unless required by applicable law or agreed to in writing,
3540 ! software distributed under the License is distributed on an
3541 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3542 ! KIND, either express or implied. See the License for the
3543 ! specific language governing permissions and limitations
3544 ! under the License.
3545 !-->
3546</div></div></div></div>
3547<div class="section">
3548<h2><a name="Lifecycle_Management_Statements"></a><a name="Lifecycle_management_statements" id="Lifecycle_management_statements">Lifecycle Management Statements</a></h2>
3549<div class="section">
3550<h3><a name="Use_Statement"></a><a name="Use" id="Use">Use Statement</a></h3>
3551<div class="section">
3552<div class="section">
3553<h5><a name="UseStmnt"></a>UseStmnt</h5>
3554<p><img src="../images/diagrams/UseStmnt.png" alt="" /></p></div>
3555<div class="section">
3556<h5><a name="DataverseName"></a>DataverseName</h5>
3557<p><img src="../images/diagrams/DataverseName.png" alt="" /></p>
3558<p>At the uppermost level, the world of data is organized into data namespaces called <b>dataverses</b>. To set the default dataverse for statements, the <tt>USE</tt> statement is provided.</p>
3559<p>As an example, the following statement sets the default dataverse to be <tt>Commerce</tt>.</p>
3560
3561<div>
3562<div>
3563<pre class="source">USE Commerce;
3564</pre></div></div>
3565</div></div></div>
3566<div class="section">
3567<h3><a name="Set_Statement"></a><a name="Sets" id="Sets"> Set Statement</a></h3>
3568<p>The <tt>SET</tt> statement can be used to override certain configuration parameters. More information about <tt>SET</tt> can be found in <a href="#Performance_tuning">Appendix 2</a>.</p></div>
3569<div class="section">
3570<h3><a name="Function_Declaration"></a><a name="Functions" id="Functions"> Function Declaration</a></h3>
3571<p>When writing a complex query, it can sometimes be helpful to define one or more auxiliary functions that each address a sub-piece of the overall query.</p>
3572<p>The <tt>DECLARE FUNCTION</tt> statement supports the creation of such helper functions. In general, the function body (expression) can be any legal query expression.</p>
3573<p>The function named in the <tt>DECLARE FUNCTION</tt> statement is accessible only in the current query. To create a persistent function for use in multiple queries, use the <tt>CREATE FUNCTION</tt> statement.</p>
3574<div class="section">
3575<div class="section">
3576<h5><a name="FunctionDeclaration"></a>FunctionDeclaration</h5>
3577<p><img src="../images/diagrams/FunctionDeclaration.png" alt="" /></p>
3578<p>The following is a simple example of a temporary function definition and its use.</p></div>
3579<div class="section">
3580<h5><a name="Example"></a>Example</h5>
3581
3582<div>
3583<div>
3584<pre class="source">DECLARE FUNCTION nameSearch(customerId){
3585 (SELECT c.custid, c.name
3586 FROM customers AS c
3587 WHERE c.custid = customerId)[0]
3588 };
3589
3590
3591SELECT VALUE nameSearch(&quot;C25&quot;);
3592</pre></div></div>
3593
3594<p>For our sample data set, this returns:</p>
3595
3596<div>
3597<div>
3598<pre class="source">[
3599 { &quot;custid&quot;: &quot;C25&quot;, &quot;name&quot;: &quot;M. Sinclair&quot; }
3600]
3601</pre></div></div>
3602</div></div></div>
3603<div class="section">
3604<h3><a name="Create_Statement"></a><a name="Create" id="Create"> Create Statement</a></h3>
3605<div class="section">
3606<div class="section">
3607<h5><a name="CreateStmnt"></a>CreateStmnt</h5>
3608<p><img src="../images/diagrams/CreateStmnt.png" alt="" /></p></div>
3609<div class="section">
3610<h5><a name="DataverseName"></a>DataverseName</h5>
3611<p><img src="../images/diagrams/DataverseName.png" alt="" /></p></div>
3612<div class="section">
3613<h5><a name="QualifiedName"></a>QualifiedName</h5>
3614<p><img src="../images/diagrams/QualifiedName.png" alt="" /></p></div>
3615<div class="section">
3616<h5><a name="DoubleQualifiedName"></a>DoubleQualifiedName</h5>
3617<p><img src="../images/diagrams/DoubleQualifiedName.png" alt="" /></p>
3618<p>The <tt>CREATE</tt> statement is used for creating dataverses as well as other persistent artifacts in a dataverse. It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined query functions.</p></div></div>
3619<div class="section">
3620<h4><a name="Create_Dataverse"></a><a name="Dataverses" id="Dataverses"> Create Dataverse</a></h4>
3621<div class="section">
3622<h5><a name="CreateDataverse"></a>CreateDataverse</h5>
3623<p><img src="../images/diagrams/CreateDataverse.png" alt="" /></p>
3624<p>The <tt>CREATE DATAVERSE</tt> statement is used to create new dataverses. To ease the authoring of reusable query scripts, an optional <tt>IF NOT EXISTS</tt> clause is included to allow creation to be requested either unconditionally or only if the dataverse does not already exist. If this clause is absent, an error is returned if a dataverse with the indicated name already exists.</p>
3625<p>The following example creates a new dataverse named <tt>Commerce</tt> if one does not already exist.</p></div>
3626<div class="section">
3627<h5><a name="Example"></a>Example</h5>
3628
3629<div>
3630<div>
3631<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
3632</pre></div></div>
3633</div></div>
3634<div class="section">
3635<h4><a name="Create_Type"></a><a name="Types" id="Types"> Create Type </a></h4>
3636<div class="section">
3637<h5><a name="CreateType"></a>CreateType</h5>
3638<p><img src="../images/diagrams/CreateType.png" alt="" /></p></div>
3639<div class="section">
3640<h5><a name="ObjectTypeDef"></a>ObjectTypeDef</h5>
3641<p><img src="../images/diagrams/ObjectTypeDef.png" alt="" /></p></div>
3642<div class="section">
3643<h5><a name="ObjectField"></a>ObjectField</h5>
3644<p><img src="../images/diagrams/ObjectField.png" alt="" /></p></div>
3645<div class="section">
3646<h5><a name="TypeExpr"></a>TypeExpr</h5>
3647<p><img src="../images/diagrams/TypeExpr.png" alt="" /></p></div>
3648<div class="section">
3649<h5><a name="ArrayTypeDef"></a>ArrayTypeDef</h5>
3650<p><img src="../images/diagrams/ArrayTypeDef.png" alt="" /></p></div>
3651<div class="section">
3652<h5><a name="MultisetTypeDef"></a>MultisetTypeDef</h5>
3653<p><img src="../images/diagrams/MultisetTypeDef.png" alt="" /></p></div>
3654<div class="section">
3655<h5><a name="TypeReference"></a>TypeReference</h5>
3656<p><img src="../images/diagrams/TypeReference.png" alt="" /></p>
3657<p>The <tt>CREATE TYPE</tt> statement is used to create a new named datatype. This type can then be used to create stored collections or utilized when defining one or more other datatypes. Much more information about the data model is available in the <a href="../datamodel.html">data model reference guide</a>. A new type can be a object type, a renaming of another type, an array type, or a multiset type. A object type can be defined as being either open or closed. Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.</p>
3658<p>The following example creates three new object types called <tt>addressType</tt>, <tt>customerType</tt>, and <tt>itemType</tt>. Their fields are essentially traditional typed name/value pairs (much like SQL fields). Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. Indeed many of the customer objects contain a rating as well, however this is not necessary for the customer object to be created. As can be seen in the sample data, customers can exist without ratings or with part (or all) of the address missing.</p></div>
3659<div class="section">
3660<h5><a name="Example"></a>Example</h5>
3661
3662<div>
3663<div>
3664<pre class="source">CREATE TYPE addressType AS {
3665 street: string,
3666 city: string,
3667 zipcode: string?
3668};
3669
3670CREATE TYPE customerType AS {
3671 custid: string,
3672 name: string,
3673 address: addressType?
3674};
3675
3676CREATE TYPE itemType AS {
3677 itemno: int,
3678 qty: int,
3679 price: int
3680};
3681</pre></div></div>
3682
3683<p>Optionally, you may wish to create a type that has an automatically generated primary key field. The example below shows an alternate form of <tt>itemType</tt> which achieves this by setting its primary key, <tt>itemno</tt>, to UUID. (Refer to the Datasets section later for more details on such fields.)</p></div>
3684<div class="section">
3685<h5><a name="Example"></a>Example</h5>
3686
3687<div>
3688<div>
3689<pre class="source">CREATE TYPE itemType AS {
3690 itemno: uuid,
3691 qty: int,
3692 price: int
3693};
3694</pre></div></div>
3695
3696<p>Note that the type of the <tt>itemno</tt> in this example is UUID. This field type can be used if you want to have an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)</p>
3697<p>The next example creates a new object type, closed this time, called <tt>orderType</tt>. Instances of this closed type will not be permitted to have extra fields, although the <tt>ship_date</tt> field is marked as optional and may thus be <tt>NULL</tt> or <tt>MISSING</tt> in legal instances of the type. The items field is an array of instances of another object type, <tt>itemType</tt>.</p></div>
3698<div class="section">
3699<h5><a name="Example"></a>Example</h5>
3700
3701<div>
3702<div>
3703<pre class="source">CREATE TYPE orderType AS CLOSED {
3704 orderno: int,
3705 custid: string,
3706 order_date: string,
3707 ship_date: string?,
3708 items: [ itemType ]
3709};
3710</pre></div></div>
3711</div></div>
3712<div class="section">
3713<h4><a name="Create_Dataset"></a><a name="Datasets" id="Datasets"> Create Dataset</a></h4>
3714<div class="section">
3715<h5><a name="CreateDataset"></a>CreateDataset</h5>
3716<p><img src="../images/diagrams/CreateDataset.png" alt="" /></p></div>
3717<div class="section">
3718<h5><a name="CreateInternalDataset"></a>CreateInternalDataset</h5>
3719<p><img src="../images/diagrams/CreateInternalDataset.png" alt="" /></p></div>
3720<div class="section">
3721<h5><a name="CreateExternalDataset"></a>CreateExternalDataset</h5>
3722<p><img src="../images/diagrams/CreateExternalDataset.png" alt="" /></p></div>
3723<div class="section">
3724<h5><a name="DatasetTypeDef"></a>DatasetTypeDef</h5>
3725<p><img src="../images/diagrams/DatasetTypeDef.png" alt="" /></p></div>
3726<div class="section">
3727<h5><a name="DatasetFieldDef"></a>DatasetFieldDef</h5>
3728<p><img src="../images/diagrams/DatasetFieldDef.png" alt="" /></p></div>
3729<div class="section">
3730<h5><a name="TypeReference"></a>TypeReference</h5>
3731<p><img src="../images/diagrams/TypeReference.png" alt="" /></p></div>
3732<div class="section">
3733<h5><a name="PrimaryKey"></a>PrimaryKey</h5>
3734<p><img src="../images/diagrams/PrimaryKey.png" alt="" /></p></div>
3735<div class="section">
3736<h5><a name="NestedField"></a>NestedField</h5>
3737<p><img src="../images/diagrams/NestedField.png" alt="" /></p></div>
3738<div class="section">
3739<h5><a name="AdapterName"></a>AdapterName</h5>
3740<p><img src="../images/diagrams/AdapterName.png" alt="" /></p></div>
3741<div class="section">
3742<h5><a name="Configuration"></a>Configuration</h5>
3743<p><img src="../images/diagrams/Configuration.png" alt="" /></p></div>
3744<div class="section">
3745<h5><a name="KeyValuePair"></a>KeyValuePair</h5>
3746<p><img src="../images/diagrams/KeyValuePair.png" alt="" /></p></div>
3747<div class="section">
3748<h5><a name="Properties"></a>Properties</h5>
3749<p><img src="../images/diagrams/Properties.png" alt="" /></p>
3750<p>The <tt>CREATE DATASET</tt> statement is used to create a new dataset. Datasets are named, multisets of object type instances; they are where data lives persistently and are the usual targets for queries. Datasets are typed, and the system ensures that their contents conform to their type definitions. An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. It is required to have a specified unique primary key field which uniquely identifies the contained objects. (The primary key is also used in secondary indexes to identify the indexed primary data objects.)</p>
3751<p>Internal datasets contain several advanced options that can be specified when appropriate. One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting <tt>AUTOGENERATED</tt> after the <tt>PRIMARY KEY</tt> identifier. In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object&#x2019;s primary key field value will be auto-generated by the system.</p>
3752<p>Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the underlying LSM storage components to be merged. (The system supports Log-Structured Merge tree based physical storage for Internal datasets.) Currently the system supports four different component merging policies that can be chosen per dataset: no-merge, constant, prefix, and correlated-prefix. The no-merge policy simply never merges disk components. The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. The prefix policy relies on both component sizes and the number of components to decide which components to merge. It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component&#x2019;s sizes exceeds M or the number of components in the sequence exceeds another threshold C. If such a sequence exists, the components in the sequence are merged together to form a single component. Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. The system&#x2019;s default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.</p>
3753<p>Another advanced option shown in the syntax above, related to performance and mentioned above, is that a <b>filter</b> can optionally be created on a field to further optimize range queries with predicates on the filter&#x2019;s field. Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. (Refer to <a href="../sqlpp/filters.html">Filter-Based LSM Index Acceleration</a> for more information about filters.)</p>
3754<p>An External dataset, in contrast to an Internal dataset, has data stored outside of the system&#x2019;s control. Files living in HDFS or in the local filesystem(s) of a cluster&#x2019;s nodes are currently supported. External dataset support allows queries to treat foreign data as though it were stored in the system, making it possible to query &#x201c;legacy&#x201d; file data (for example, Hive data) without having to physically import it. When defining an External dataset, an appropriate adapter type must be selected for the desired external data. (See the <a href="../aql/externaldata.html">Guide to External Data</a> for more information on the available adapters.)</p>
3755<p>The following example creates an Internal dataset for storing <tt>customerType</tt> objects. It specifies that their <tt>custid</tt> field is their primary key.</p></div>
3756<div class="section">
3757<h5><a name="Example"></a>Example</h5>
3758
3759<div>
3760<div>
3761<pre class="source">CREATE INTERNAL DATASET customers(customerType) PRIMARY KEY custid;
3762</pre></div></div>
3763
3764<p>The next example creates an Internal dataset (the default kind when no dataset kind is specified) for storing <tt>itemType</tt> objects might look like. It specifies that the <tt>itemno</tt> field should be used as the primary key for the dataset. It also specifies that the <tt>itemno</tt> field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.)</p>
3765<p>Note that the <tt>itemno</tt> field&#x2019;s declared type must be UUID in this case.</p></div>
3766<div class="section">
3767<h5><a name="Example"></a>Example</h5>
3768
3769<div>
3770<div>
3771<pre class="source">CREATE DATASET MyItems(itemType) PRIMARY KEY itemno AUTOGENERATED;
3772</pre></div></div>
3773
3774<p>Alternatively the dataset object type can be specified using inline type definition syntax.</p></div>
3775<div class="section">
3776<h5><a name="Example"></a>Example</h5>
3777
3778<div>
3779<div>
3780<pre class="source">CREATE DATASET MyItems(itemno INT NOT UNKNOWN, qty INT NOT UNKNOWN, price INT NOT UNKNOWN) PRIMARY KEY itemno AUTOGENERATED;
3781</pre></div></div>
3782
3783<p>The next example creates an External dataset for querying LineItemType objects. The choice of the <tt>hdfs</tt> adapter means that this dataset&#x2019;s data actually resides in HDFS. The example <tt>CREATE</tt> statement also provides parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format.</p></div>
3784<div class="section">
3785<h5><a name="Example"></a>Example</h5>
3786
3787<div>
3788<div>
3789<pre class="source">CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
3790 (&quot;hdfs&quot;=&quot;hdfs://HOST:PORT&quot;),
3791 (&quot;path&quot;=&quot;HDFS_PATH&quot;),
3792 (&quot;input-format&quot;=&quot;text-input-format&quot;),
3793 (&quot;format&quot;=&quot;delimited-text&quot;),
3794 (&quot;delimiter&quot;=&quot;|&quot;));
3795</pre></div></div>
3796</div></div>
3797<div class="section">
3798<h4><a name="Create_Index"></a><a name="Indices" id="Indices">Create Index</a></h4>
3799<div class="section">
3800<h5><a name="CreateIndex"></a>CreateIndex</h5>
3801<p><img src="../images/diagrams/CreateIndex.png" alt="" /></p></div>
3802<div class="section">
3803<h5><a name="CreateSecondaryIndex"></a>CreateSecondaryIndex</h5>
3804<p><img src="../images/diagrams/CreateSecondaryIndex.png" alt="" /></p></div>
3805<div class="section">
3806<h5><a name="CreatePrimaryKeyIndex"></a>CreatePrimaryKeyIndex</h5>
3807<p><img src="../images/diagrams/CreatePrimaryKeyIndex.png" alt="" /></p></div>
3808<div class="section">
3809<h5><a name="IndexedElement"></a>IndexedElement</h5>
3810<p><b><img src="../images/diagrams/IndexedElement.png" alt="" /></b></p></div>
3811<div class="section">
3812<h5><a name="ArrayIndexElement"></a>ArrayIndexElement</h5>
3813<p><b><img src="../images/diagrams/ArrayIndexElement.png" alt="" /></b></p></div>
3814<div class="section">
3815<h5><a name="IndexField"></a>IndexField</h5>
3816<p><b><img src="../images/diagrams/IndexField.png" alt="" /></b></p></div>
3817<div class="section">
3818<h5><a name="NestedField"></a>NestedField</h5>
3819<p><img src="../images/diagrams/NestedField.png" alt="" /></p></div>
3820<div class="section">
3821<h5><a name="IndexType"></a>IndexType</h5>
3822<p><img src="../images/diagrams/IndexType.png" alt="" /></p>
3823<p>The <tt>CREATE INDEX</tt> statement creates a secondary index on one or more fields of a specified dataset. Supported index types include <tt>BTREE</tt> for totally ordered datatypes, <tt>RTREE</tt> for spatial data, and <tt>KEYWORD</tt> and <tt>NGRAM</tt> for textual (string) data. An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier. An array index can be created on an array or multiset datatype by providing a sequence of <tt>UNNEST</tt> and <tt>SELECT</tt>s to identify the field(s) to be indexed.</p>
3824<p>An indexed field is not required to be part of the datatype associated with a dataset if the dataset&#x2019;s datatype is declared as open <b>and</b> if the field&#x2019;s type is provided along with its name and if the <tt>ENFORCED</tt> keyword is specified at the end of the index definition. <tt>ENFORCING</tt> an open field introduces a check that makes sure that the actual type of the indexed field (if the optional field exists in the object) always matches this specified (open) field type.</p>
3825<p>The following example creates a btree index called <tt>cCustIdx</tt> on the <tt>custid</tt> field of the orders dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>custid</tt> field.</p></div>
3826<div class="section">
3827<h5><a name="Example"></a>Example</h5>
3828
3829<div>
3830<div>
3831<pre class="source">CREATE INDEX cCustIdx ON orders(custid) TYPE BTREE;
3832</pre></div></div>
3833
3834<p>The following example creates a btree index called <tt>oCNameIdx</tt> on the <tt>cname</tt> field of the orders dataset, but does not insert <tt>NULL</tt> and <tt>MISSING</tt> values into the index. By default, if <tt>INCLUDE/EXCLUDE UNKNOWN KEY</tt> is not specified, unknown values will be inserted into btree indexes.</p></div>
3835<div class="section">
3836<h5><a name="Example"></a>Example</h5>
3837
3838<div>
3839<div>
3840<pre class="source">CREATE INDEX oCNametIdx ON orders(cname) EXCLUDE UNKNOWN KEY;
3841</pre></div></div>
3842
3843<p>The following example creates an open btree index called <tt>oCreatedTimeIdx</tt> on the (non-declared) <tt>createdTime</tt> field of the <tt>orders</tt> dataset having <tt>datetime</tt> type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>createdTime</tt> field. The index is enforced so that records that do not have the <tt>createdTime</tt> field or have a mismatched type on the field cannot be inserted into the dataset.</p></div>
3844<div class="section">
3845<h5><a name="Example"></a>Example</h5>
3846
3847<div>
3848<div>
3849<pre class="source">CREATE INDEX oCreatedTimeIdx ON orders(createdTime: datetime?) TYPE BTREE ENFORCED;
3850</pre></div></div>
3851
3852<p>The following example creates an open btree index called <tt>cAddedTimeIdx</tt> on the (non-declared) <tt>addedTime</tt> field of the <tt>customers</tt> dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>addedTime</tt> field. The index is not enforced so that records that do not have the <tt>addedTime</tt> field or have a mismatched type on the field can still be inserted into the dataset.</p></div>
3853<div class="section">
3854<h5><a name="Example"></a>Example</h5>
3855
3856<div>
3857<div>
3858<pre class="source">CREATE INDEX cAddedTimeIdx ON customers(addedTime: datetime?);
3859</pre></div></div>
3860
3861<p>The following example creates a btree index called <tt>oOrderUserNameIdx</tt> on <tt>orderUserName</tt>, a nested field residing within a object-valued user field in the <tt>orders</tt> dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the nested <tt>orderUserName</tt> field.</p></div>
3862<div class="section">
3863<h5><a name="Example"></a>Example</h5>
3864
3865<div>
3866<div>
3867<pre class="source">CREATE INDEX oOrderUserNameIdx ON orders(order.orderUserName) TYPE BTREE;
3868</pre></div></div>
3869
3870<p>The following example creates an array index called <tt>oItemsPriceIdx</tt> on the <tt>price</tt> field inside the <tt>items</tt> array of the <tt>orders</tt> dataset. This index can be useful for accelerating membership queries, existential or universal quantification queries, or joins involving the <tt>price</tt> field inside this array. Unknown values cannot currently be stored inside array indexes, so <tt>EXCLUDE UNKNOWN KEY</tt> must be specified.</p></div></div>
3871<div class="section">
3872<h4><a name="Example"></a>Example</h4>
3873
3874<div>
3875<div>
3876<pre class="source">CREATE INDEX oItemsPriceIdx ON orders(UNNEST items SELECT price) EXCLUDE UNKNOWN KEY;
3877</pre></div></div>
3878
3879<p>The following example creates an open rtree index called <tt>oOrderLocIdx</tt> on the order-location field of the <tt>orders</tt> dataset. This index can be useful for accelerating queries that use the <a href="builtins.html#spatial_intersect"><tt>spatial-intersect</tt> function</a> in a predicate involving the sender-location field.</p>
3880<div class="section">
3881<h5><a name="Example"></a>Example</h5>
3882
3883<div>
3884<div>
3885<pre class="source">CREATE INDEX oOrderLocIDx ON orders(`order-location` : point?) TYPE RTREE ENFORCED;
3886</pre></div></div>
3887
3888<p>The following example creates a 3-gram index called <tt>cUserIdx</tt> on the name field of the <tt>customers</tt> dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on <a href="similarity.html#NGram_Index">similarity queries</a>.</p></div>
3889<div class="section">
3890<h5><a name="Example"></a>Example</h5>
3891
3892<div>
3893<div>
3894<pre class="source">CREATE INDEX cUserIdx ON customers(name) TYPE NGRAM(3);
3895</pre></div></div>
3896
3897<p>The following example creates a keyword index called <tt>oCityIdx</tt> on the <tt>city</tt> within the <tt>address</tt> field of the <tt>customers</tt> dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the <tt>address</tt> field. For details refer to the document on <a href="similarity.html#Keyword_Index">similarity queries</a>.</p></div>
3898<div class="section">
3899<h5><a name="Example"></a>Example</h5>
3900
3901<div>
3902<div>
3903<pre class="source">CREATE INDEX oCityIdx ON customers(address.city) TYPE KEYWORD;
3904</pre></div></div>
3905
3906<p>The following example creates a special secondary index which holds only the primary keys. This index is useful for speeding up aggregation queries which involve only primary keys. The name of the index is optional. If the name is not specified, the system will generate one. When the user would like to drop this index, the metadata can be queried to find the system-generated name.</p></div>
3907<div class="section">
3908<h5><a name="Example"></a>Example</h5>
3909
3910<div>
3911<div>
3912<pre class="source">CREATE PRIMARY INDEX cus_pk_idx ON customers;
3913</pre></div></div>
3914
3915<p>An example query that can be accelerated using the primary-key index:</p>
3916
3917<div>
3918<div>
3919<pre class="source">SELECT COUNT(*) FROM customers;
3920</pre></div></div>
3921
3922<p>To look up the the above primary-key index, issue the following query:</p>
3923
3924<div>
3925<div>
3926<pre class="source">SELECT VALUE i
3927FROM Metadata.`Index` i
3928WHERE i.DataverseName = &quot;Commerce&quot; AND i.DatasetName = &quot;customers&quot;;
3929</pre></div></div>
3930
3931<p>The query returns:</p>
3932
3933<div>
3934<div>
3935<pre class="source">[
3936 {
3937 &quot;DataverseName&quot;: &quot;Commerce&quot;,
3938 &quot;DatasetName&quot;: &quot;customers&quot;,
3939 &quot;IndexName&quot;: &quot;cus_pk_idx&quot;,
3940 &quot;IndexStructure&quot;: &quot;BTREE&quot;,
3941 &quot;SearchKey&quot;: [],
3942 &quot;IsPrimary&quot;: false,
3943 &quot;Timestamp&quot;: &quot;Fri Sep 18 14:15:51 PDT 2020&quot;,
3944 &quot;PendingOp&quot;: 0
3945 },
3946 {
3947 &quot;DataverseName&quot;: &quot;Commerce&quot;,
3948 &quot;DatasetName&quot;: &quot;customers&quot;,
3949 &quot;IndexName&quot;: &quot;customers&quot;,
3950 &quot;IndexStructure&quot;: &quot;BTREE&quot;,
3951 &quot;SearchKey&quot;: [
3952 [
3953 &quot;custid&quot;
3954 ]
3955 ],
3956 &quot;IsPrimary&quot;: true,
3957 &quot;Timestamp&quot;: &quot;Thu Jul 16 13:07:37 PDT 2020&quot;,
3958 &quot;PendingOp&quot;: 0
3959 }
3960]
3961</pre></div></div>
3962
3963<p>Remember that <tt>CREATE PRIMARY INDEX</tt> creates a secondary index. That is the reason the <tt>IsPrimary</tt> field is false. The primary-key index can be identified by the fact that the <tt>SearchKey</tt> field is empty since it only contains primary key fields.</p></div></div>
3964<div class="section">
3965<h4><a name="Create_Synonym"></a><a name="Synonyms" id="Synonyms"> Create Synonym</a></h4>
3966<div class="section">
3967<h5><a name="CreateSynonym"></a>CreateSynonym</h5>
3968<p><img src="../images/diagrams/CreateSynonym.png" alt="" /></p>
3969<p>The <tt>CREATE SYNONYM</tt> statement creates a synonym for a given dataset or a view. This synonym may be used instead of the dataset name in <tt>SELECT</tt>, <tt>INSERT</tt>, <tt>UPSERT</tt>, <tt>DELETE</tt>, and <tt>LOAD</tt> statements, or instead of the view name in <tt>SELECT</tt> statements. The target dataset or view does not need to exist when the synonym is created. A synonym may be created for another synonym.</p></div>
3970<div class="section">
3971<h5><a name="Example"></a>Example</h5>
3972
3973<div>
3974<div>
3975<pre class="source">CREATE DATASET customers(customersType) PRIMARY KEY custid;
3976
3977CREATE SYNONYM customersSynonym FOR customers;
3978
3979SELECT * FROM customersSynonym;
3980</pre></div></div>
3981
3982<p>More information on how synonyms are resolved can be found in <a href="#Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a>.</p></div></div>
3983<div class="section">
3984<h4><a name="Create_Function"></a><a name="Create_function" id="Create_function">Create Function</a></h4>
3985<p>The <tt>CREATE FUNCTION</tt> statement creates a <b>named</b> function that can then be used and reused in queries. The body of a function can be any query expression involving the function&#x2019;s parameters.</p>
3986<div class="section">
3987<h5><a name="CreateFunction"></a>CreateFunction</h5>
3988<p><img src="../images/diagrams/CreateFunction.png" alt="" /></p></div>
3989<div class="section">
3990<h5><a name="FunctionParameters"></a>FunctionParameters</h5>
3991<p><img src="../images/diagrams/FunctionParameters.png" alt="" /></p></div>
3992<div class="section">
3993<h5><a name="ExternalFunctionDef"></a>ExternalFunctionDef</h5>
3994<p><img src="../images/diagrams/ExternalFunctionDef.png" alt="" /></p>
3995<p>The following is an example of a <tt>CREATE FUNCTION</tt> statement which is similar to our earlier <tt>DECLARE FUNCTION</tt> example.</p>
3996<p>It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).</p></div>
3997<div class="section">
3998<h5><a name="Example"></a>Example</h5>
3999
4000<div>
4001<div>
4002<pre class="source">CREATE FUNCTION nameSearch(customerId) {
4003 (SELECT c.custid, c.name
4004 FROM customers AS c
4005 WHERE u.custid = customerId)[0]
4006 };
4007</pre></div></div>
4008
4009<p>The following is an example of CREATE FUNCTION statement that replaces an existing function.</p></div>
4010<div class="section">
4011<h5><a name="Example"></a>Example</h5>
4012
4013<div>
4014<div>
4015<pre class="source">CREATE OR REPLACE FUNCTION friendInfo(userId) {
4016 (SELECT u.id, u.name
4017 FROM GleambookUsers u
4018 WHERE u.id = userId)[0]
4019 };
4020</pre></div></div>
4021
4022<p>The following is an example of CREATE FUNCTION statement that introduces a function with a variable number of arguments. The arguments are accessible in the function body via <tt>args</tt> array parameter.</p></div>
4023<div class="section">
4024<h5><a name="Example"></a>Example</h5>
4025
4026<div>
4027<div>
4028<pre class="source">CREATE FUNCTION strJoin(...) {
4029 string_join(args, &quot;,&quot;)
4030};
4031</pre></div></div>
4032
4033<p>External functions can also be loaded into Libraries via the <a href="../udf.html">UDF API</a>. Given an already loaded library <tt>pylib</tt>, a function <tt>sentiment</tt> mapping to a Python method <tt>sent_model.sentiment</tt> in <tt>sentiment_mod</tt> would be as follows</p></div>
4034<div class="section">
4035<h5><a name="Example"></a>Example</h5>
4036
4037<div>
4038<div>
4039<pre class="source">CREATE FUNCTION sentiment(a) AS &quot;sentiment_mod&quot;, &quot;sent_model.sentiment&quot; AT pylib;
4040</pre></div></div>
4041</div></div>
4042<div class="section">
4043<h4><a name="Create_View"></a><a name="Create_view" id="Create_view">Create View</a></h4>
4044<p>The <tt>CREATE VIEW</tt> statement creates a <b>named</b> view that can then be used in queries. The body of a view can be any <tt>SELECT</tt> statement.</p>
4045<div class="section">
4046<h5><a name="CreateView"></a>CreateView</h5>
4047<p><img src="../images/diagrams/CreateView.png" alt="" /></p></div>
4048<div class="section">
4049<h5><a name="Example"></a>Example</h5>
4050
4051<div>
4052<div>
4053<pre class="source">CREATE DATASET customers(customersType) PRIMARY KEY custid;
4054
4055CREATE VIEW customersView AS
4056 SELECT c.custid, c.name
4057 FROM customers AS c
4058 WHERE c.address.city = &quot;Boston, MA&quot;;
4059
4060SELECT * FROM customersView;
4061</pre></div></div>
4062</div></div></div>
4063<div class="section">
4064<h3><a name="Drop_Statement"></a><a name="Removal" id="Removal">Drop Statement</a></h3>
4065<div class="section">
4066<div class="section">
4067<h5><a name="DropStmnt"></a>DropStmnt</h5>
4068<p><img src="../images/diagrams/DropStmnt.png" alt="" /></p></div>
4069<div class="section">
4070<h5><a name="DataverseName"></a>DataverseName</h5>
4071<p><img src="../images/diagrams/DataverseName.png" alt="" /></p></div>
4072<div class="section">
4073<h5><a name="QualifiedName"></a>QualifiedName</h5>
4074<p><img src="../images/diagrams/QualifiedName.png" alt="" /></p></div>
4075<div class="section">
4076<h5><a name="DoubleQualifiedName"></a>DoubleQualifiedName</h5>
4077<p><img src="../images/diagrams/DoubleQualifiedName.png" alt="" /></p></div>
4078<div class="section">
4079<h5><a name="FunctionSignature"></a>FunctionSignature</h5>
4080<p><img src="../images/diagrams/FunctionSignature.png" alt="" /></p></div>
4081<div class="section">
4082<h5><a name="FunctionParameters"></a>FunctionParameters</h5>
4083<p><img src="../images/diagrams/FunctionParameters.png" alt="" /></p>
4084<p>The <tt>DROP</tt> statement is the inverse of the <tt>CREATE</tt> statement. It can be used to drop dataverses, datatypes, datasets, indexes, functions, and synonyms.</p>
4085<p>The following examples illustrate some uses of the <tt>DROP</tt> statement.</p></div>
4086<div class="section">
4087<h5><a name="Example"></a>Example</h5>
4088
4089<div>
4090<div>
4091<pre class="source">DROP DATASET customers IF EXISTS;
4092
4093DROP INDEX orders.orderidIndex;
4094
4095DROP TYPE customers2.customersType;
4096
4097DROP FUNCTION nameSearch@1;
4098
4099DROP SYNONYM customersSynonym;
4100
4101DROP VIEW customersView;
4102
4103DROP DATAVERSE CommerceData;
4104</pre></div></div>
4105
4106<p>When an artifact is dropped, it will be droppped from the current dataverse if none is specified (see the <tt>DROP DATASET</tt> example above) or from the specified dataverse (see the <tt>DROP TYPE</tt> example above) if one is specified by fully qualifying the artifact name in the <tt>DROP</tt> statement. When specifying an index to drop, the index name must be qualified by the dataset that it indexes. When specifying a function to drop, since the query language allows functions to be overloaded by their number of arguments, the identifying name of the function to be dropped must explicitly include that information. (<tt>nameSearch@1</tt> above denotes the 1-argument function named <tt>nameSearch</tt> in the current dataverse.)</p></div></div></div>
4107<div class="section">
4108<h3><a name="Load_Statement"></a><a name="Load_statement" id="Load_statement">Load Statement</a></h3>
4109<div class="section">
4110<div class="section">
4111<h5><a name="LoadStmnt"></a>LoadStmnt</h5>
4112<p><img src="../images/diagrams/LoadStmnt.png" alt="" /></p></div>
4113<div class="section">
4114<h5><a name="AdapterName"></a>AdapterName</h5>
4115<p><img src="../images/diagrams/AdapterName.png" alt="" /></p></div>
4116<div class="section">
4117<h5><a name="Configuration"></a>Configuration</h5>
4118<p><img src="../images/diagrams/Configuration.png" alt="" /></p></div>
4119<div class="section">
4120<h5><a name="KeyValuePair"></a>KeyValuePair</h5>
4121<p><img src="../images/diagrams/KeyValuePair.png" alt="" /></p>
4122<p>The <tt>LOAD</tt> statement is used to initially populate a dataset via bulk loading of data from an external file. An appropriate adapter must be selected to handle the nature of the desired external data. The <tt>LOAD</tt> statement accepts the same adapters and the same parameters as discussed earlier for External datasets. (See the <a href="../aql/externaldata.html">guide to external data</a> for more information on the available adapters.) If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.</p>
4123<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
4124<p>The following example shows how to bulk load the <tt>customers</tt> dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.</p></div>
4125<div class="section">
4126<h5><a name="Example"></a>Example</h5>
4127
4128<div>
4129<div>
4130<pre class="source"> LOAD DATASET customers USING localfs
4131 ((&quot;path&quot;=&quot;127.0.0.1:///Users/bignosqlfan/commercenew/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
4132</pre></div></div>
4133</div></div></div></div>
4134<div class="section">
4135<h2><a name="Modification_statements" id="Modification_statements">Modification statements</a></h2>
4136<div class="section">
4137<h3><a name="Insert_Statement"></a><a name="Inserts" id="Inserts">Insert Statement</a></h3>
4138<div class="section">
4139<div class="section">
4140<h5><a name="InsertStmnt"></a>InsertStmnt</h5>
4141<p><img src="../images/diagrams/InsertStmnt.png" alt="" /></p>
4142<p>The <tt>INSERT</tt> statement is used to insert new data into a dataset. The data to be inserted comes from a query expression. This expression can be as simple as a constant expression, or in general it can be any legal query. In case the dataset has an auto-generated primary key, when performing an <tt>INSERT</tt> operation, the system allows the user to manually add the auto-generated key field in the <tt>INSERT</tt> statement, or skip that field and the system will automatically generate it and add it. However, it is important to note that if the a record already exists in the dataset with the auto-generated key provided by the user, then that operation is going to fail. As a general rule, insertion will fail if the dataset already has data with the primary key value(s) being inserted.</p>
4143<p>Inserts are processed transactionally by the system. The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). If the query part of an insert returns a single object, then the <tt>INSERT</tt> statement will be a single, atomic transaction. If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction.</p>
4144<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
4145<p>The following example illustrates a query-based insertion.</p></div>
4146<div class="section">
4147<h5><a name="Example"></a>Example</h5>
4148
4149<div>
4150<div>
4151<pre class="source">INSERT INTO custCopy (SELECT VALUE c FROM customers c)
4152</pre></div></div>
4153</div></div></div>
4154<div class="section">
4155<h3><a name="Upsert_Statement"></a><a name="Upserts" id="Upserts">Upsert Statement</a></h3>
4156<div class="section">
4157<div class="section">
4158<h5><a name="UpsertStmnt"></a>UpsertStmnt</h5>
4159<p><img src="../images/diagrams/UpsertStmnt.png" alt="" /></p>
4160<p>The <tt>UPSERT</tt> statement syntactically mirrors the <tt>INSERT</tt>statement discussed above. The difference lies in its semantics, which for <tt>UPSERT</tt> are &#x201c;add or replace&#x201d; instead of the <tt>INSERT</tt> &#x201c;add if not present, else error&#x201d; semantics. Whereas an <tt>INSERT</tt> can fail if another object already exists with the specified key, the analogous <tt>UPSERT</tt> will replace the previous object&#x2019;s value with that of the new object in such cases. Like the <tt>INSERT</tt> statement, the system allows the user to manually provide the auto-generated key for datasets with an auto-generated key as its primary key. This operation will insert the record if no record with that key already exists, but if a record with the key already exists, then the operation will be converted to a replace/update operation.</p>
4161<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
4162<p>The following example illustrates a query-based upsert operation.</p></div>
4163<div class="section">
4164<h5><a name="Example"></a>Example</h5>
4165
4166<div>
4167<div>
4168<pre class="source">UPSERT INTO custCopy (SELECT VALUE c FROM customers c)
4169</pre></div></div>
4170</div></div></div>
4171<div class="section">
4172<h3><a name="Delete_Statement"></a><a name="Deletes" id="Deletes">Delete Statement</a></h3>
4173<div class="section">
4174<div class="section">
4175<h5><a name="DeleteStmnt"></a>DeleteStmnt</h5>
4176<p><img src="../images/diagrams/DeleteStmnt.png" alt="" /></p>
4177<p>The <tt>DELETE</tt> statement is used to delete data from a target dataset. The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the <tt>DELETE</tt> statement.</p>
4178<p>Deletes are processed transactionally by the system. The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). If the boolean expression for a delete identifies a single object, then the <tt>DELETE</tt> statement itself will be a single, atomic transaction. If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.</p>
4179<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
4180<p>The following examples illustrate single-object deletions.</p></div>
4181<div class="section">
4182<h5><a name="Example"></a>Example</h5>
4183
4184<div>
4185<div>
4186<pre class="source">DELETE FROM customers c WHERE c.custid = &quot;C41&quot;;
4187</pre></div></div>
4188</div>
4189<div class="section">
4190<h5><a name="Example"></a>Example</h5>
4191
4192<div>
4193<div>
4194<pre class="source">DELETE FROM customers WHERE custid = &quot;C47&quot;;
4195</pre></div></div>
4196<!--
4197 ! Licensed to the Apache Software Foundation (ASF) under one
4198 ! or more contributor license agreements. See the NOTICE file
4199 ! distributed with this work for additional information
4200 ! regarding copyright ownership. The ASF licenses this file
4201 ! to you under the Apache License, Version 2.0 (the
4202 ! "License"); you may not use this file except in compliance
4203 ! with the License. You may obtain a copy of the License at
4204 !
4205 ! http://www.apache.org/licenses/LICENSE-2.0
4206 !
4207 ! Unless required by applicable law or agreed to in writing,
4208 ! software distributed under the License is distributed on an
4209 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4210 ! KIND, either express or implied. See the License for the
4211 ! specific language governing permissions and limitations
4212 ! under the License.
4213 !-->
4214
4215<h1><a name="Reserved_keywords" id="Reserved_keywords">Appendix 1. Reserved keywords</a></h1><!--
4216 ! Licensed to the Apache Software Foundation (ASF) under one
4217 ! or more contributor license agreements. See the NOTICE file
4218 ! distributed with this work for additional information
4219 ! regarding copyright ownership. The ASF licenses this file
4220 ! to you under the Apache License, Version 2.0 (the
4221 ! "License"); you may not use this file except in compliance
4222 ! with the License. You may obtain a copy of the License at
4223 !
4224 ! http://www.apache.org/licenses/LICENSE-2.0
4225 !
4226 ! Unless required by applicable law or agreed to in writing,
4227 ! software distributed under the License is distributed on an
4228 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4229 ! KIND, either express or implied. See the License for the
4230 ! specific language governing permissions and limitations
4231 ! under the License.
4232 !-->
4233
4234<p>All reserved keywords are listed in the following table:</p>
4235<table border="0" class="table table-striped">
4236<thead>
4237
4238<tr class="a">
4239<th> </th>
4240<th> </th>
4241<th> </th>
4242<th> </th>
4243<th> </th>
4244<th> </th></tr>
4245</thead><tbody>
4246
4247<tr class="b">
4248<td> ADAPTER </td>
4249<td> ALL </td>
4250<td> AND </td>
4251<td> ANY </td>
4252<td> APPLY </td>
4253<td> AS </td></tr>
4254<tr class="a">
4255<td> ASC </td>
4256<td> AT </td>
4257<td> AUTOGENERATED </td>
4258<td> BETWEEN </td>
4259<td> BTREE </td>
4260<td> BY </td></tr>
4261<tr class="b">
4262<td> CASE </td>
4263<td> CLOSED </td>
4264<td> COLLECTION </td>
4265<td> CREATE </td>
4266<td> COMPACTION </td>
4267<td> COMPACT </td></tr>
4268<tr class="a">
4269<td> CONNECT </td>
4270<td> CORRELATE </td>
4271<td> DATASET </td>
4272<td> DATAVERSE </td>
4273<td> DECLARE </td>
4274<td> DEFINITION </td></tr>
4275<tr class="b">
4276<td> DELETE </td>
4277<td> DESC </td>
4278<td> DISCONNECT </td>
4279<td> DISTINCT </td>
4280<td> DIV </td>
4281<td> DROP </td></tr>
4282<tr class="a">
4283<td> ELEMENT </td>
4284<td> EXPLAIN </td>
4285<td> ELSE </td>
4286<td> ENFORCED </td>
4287<td> END </td>
4288<td> EVERY </td></tr>
4289<tr class="b">
4290<td> EXCEPT </td>
4291<td> EXIST </td>
4292<td> EXTERNAL </td>
4293<td> FEED </td>
4294<td> FILTER </td>
4295<td> FLATTEN </td></tr>
4296<tr class="a">
4297<td> FOR </td>
4298<td> FROM </td>
4299<td> FULL </td>
4300<td> FULLTEXT </td>
4301<td> FUNCTION </td>
4302<td> GROUP </td></tr>
4303<tr class="b">
4304<td> HAVING </td>
4305<td> HINTS </td>
4306<td> IF </td>
4307<td> INTO </td>
4308<td> IN </td>
4309<td> INDEX </td></tr>
4310<tr class="a">
4311<td> INGESTION </td>
4312<td> INNER </td>
4313<td> INSERT </td>
4314<td> INTERNAL </td>
4315<td> INTERSECT </td>
4316<td> IS </td></tr>
4317<tr class="b">
4318<td> JOIN </td>
4319<td> KEYWORD </td>
4320<td> LEFT </td>
4321<td> LETTING </td>
4322<td> LET </td>
4323<td> LIKE </td></tr>
4324<tr class="a">
4325<td> LIMIT </td>
4326<td> LOAD </td>
4327<td> MISSING </td>
4328<td> NODEGROUP </td>
4329<td> NGRAM </td>
4330<td> NOT </td></tr>
4331<tr class="b">
4332<td> NULL </td>
4333<td> OFFSET </td>
4334<td> ON </td>
4335<td> OPEN </td>
4336<td> OR </td>
4337<td> ORDER </td></tr>
4338<tr class="a">
4339<td> OUTER </td>
4340<td> OUTPUT </td>
4341<td> OVER </td>
4342<td> PATH </td>
4343<td> POLICY </td>
4344<td> PRE-SORTED </td></tr>
4345<tr class="b">
4346<td> PRIMARY </td>
4347<td> RAW </td>
4348<td> REFRESH </td>
4349<td> RETURN </td>
4350<td> RETURNING </td>
4351<td> RIGHT </td></tr>
4352<tr class="a">
4353<td> RTREE </td>
4354<td> RUN </td>
4355<td> SATISFIES </td>
4356<td> SECONDARY </td>
4357<td> SELECT </td>
4358<td> SET </td></tr>
4359<tr class="b">
4360<td> SOME </td>
4361<td> START </td>
4362<td> STOP </td>
4363<td> SYNONYM </td>
4364<td> TEMPORARY </td>
4365<td> THEN </td></tr>
4366<tr class="a">
4367<td> TO </td>
4368<td> TRUE </td>
4369<td> TYPE </td>
4370<td> UNION </td>
4371<td> UNKNOWN </td>
4372<td> UNNEST </td></tr>
4373<tr class="b">
4374<td> UPDATE </td>
4375<td> UPSERT </td>
4376<td> USE </td>
4377<td> USING </td>
4378<td> VALUE </td>
4379<td> VALUED </td></tr>
4380<tr class="a">
4381<td> WHEN </td>
4382<td> WHERE </td>
4383<td> WITH </td>
4384<td> WRITE </td>
4385<td> </td>
4386<td> </td></tr>
4387</tbody>
4388</table><!--
4389 ! Licensed to the Apache Software Foundation (ASF) under one
4390 ! or more contributor license agreements. See the NOTICE file
4391 ! distributed with this work for additional information
4392 ! regarding copyright ownership. The ASF licenses this file
4393 ! to you under the Apache License, Version 2.0 (the
4394 ! "License"); you may not use this file except in compliance
4395 ! with the License. You may obtain a copy of the License at
4396 !
4397 ! http://www.apache.org/licenses/LICENSE-2.0
4398 !
4399 ! Unless required by applicable law or agreed to in writing,
4400 ! software distributed under the License is distributed on an
4401 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4402 ! KIND, either express or implied. See the License for the
4403 ! specific language governing permissions and limitations
4404 ! under the License.
4405 !-->
4406</div></div></div></div>
4407<div class="section">
4408<h2><a name="Appendix_2._Performance_Tuning"></a><a name="Performance_tuning" id="Performance_tuning">Appendix 2. Performance Tuning</a></h2><!--
4409 ! Licensed to the Apache Software Foundation (ASF) under one
4410 ! or more contributor license agreements. See the NOTICE file
4411 ! distributed with this work for additional information
4412 ! regarding copyright ownership. The ASF licenses this file
4413 ! to you under the Apache License, Version 2.0 (the
4414 ! "License"); you may not use this file except in compliance
4415 ! with the License. You may obtain a copy of the License at
4416 !
4417 ! http://www.apache.org/licenses/LICENSE-2.0
4418 !
4419 ! Unless required by applicable law or agreed to in writing,
4420 ! software distributed under the License is distributed on an
4421 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4422 ! KIND, either express or implied. See the License for the
4423 ! specific language governing permissions and limitations
4424 ! under the License.
4425 !-->
4426
4427<p>The <tt>SET</tt> statement can be used to override some cluster-wide configuration parameters for a specific request:</p>
4428<div class="section">
4429<div class="section">
4430<div class="section">
4431<h5><a name="SetStmnt"></a>SetStmnt</h5>
4432<p><img src="../images/diagrams/SetStmnt.png" alt="" /></p>
4433<p>As parameter identifiers are qualified names (containing a &#x2018;.&#x2019;) they have to be escaped using backticks (``). Note that changing query parameters will not affect query correctness but only impact performance characteristics, such as response time and throughput.</p></div></div></div></div>
4434<div class="section">
4435<h2><a name="Parallelism_Parameter"></a><a name="Parallelism_parameter" id="Parallelism_parameter">Parallelism Parameter</a></h2>
4436<p>The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) in a cluster. A user can manually specify the maximum execution parallelism for a request to scale it up and down using the following parameter:</p>
4437<ul>
4438
4439<li><b>compiler.parallelism</b>: the maximum number of CPU cores can be used to process a query. There are three cases of the value <i>p</i> for compiler.parallelism:
4440<ul>
4441
4442<li>
4443
4444<p><i>p</i> &lt; 0 or <i>p</i> &gt; the total number of cores in a cluster: the system will use all available cores in the cluster;</p>
4445</li>
4446<li>
4447
4448<p><i>p</i> = 0 (the default): the system will use the storage parallelism (the number of partitions of stored datasets) as the maximum parallelism for query processing;</p>
4449</li>
4450<li>
4451
4452<p>all other cases: the system will use the user-specified number as the maximum number of CPU cores to use for executing the query.</p>
4453</li>
4454</ul>
4455</li>
4456</ul>
4457<div class="section">
4458<div class="section">
4459<div class="section">
4460<h5><a name="Example"></a>Example</h5>
4461
4462<div>
4463<div>
4464<pre class="source">SET `compiler.parallelism` &quot;16&quot;;
4465
4466SELECT c.name AS cname, o.orderno AS orderno
4467FROM customers c JOIN orders o ON c.custid = o.custid;
4468</pre></div></div>
4469</div></div></div></div>
4470<div class="section">
4471<h2><a name="Memory_Parameters"></a><a name="Memory_parameters" id="Memory_parameters">Memory Parameters</a></h2>
4472<p>In the system, each blocking runtime operator such as join, group-by and order-by works within a fixed memory budget, and can gracefully spill to disks if the memory budget is smaller than the amount of data they have to hold. A user can manually configure the memory budget of those operators within a query. The supported configurable memory parameters are:</p>
4473<ul>
4474
4475<li>
4476
4477<p><b>compiler.groupmemory</b>: the memory budget that each parallel group-by operator instance can use; 32MB is the default budget.</p>
4478</li>
4479<li>
4480
4481<p><b>compiler.sortmemory</b>: the memory budget that each parallel sort operator instance can use; 32MB is the default budget.</p>
4482</li>
4483<li>
4484
4485<p><b>compiler.joinmemory</b>: the memory budget that each parallel hash join operator instance can use; 32MB is the default budget.</p>
4486</li>
4487<li>
4488
4489<p><b>compiler.windowmemory</b>: the memory budget that each parallel window aggregate operator instance can use; 32MB is the default budget.</p>
4490</li>
4491</ul>
4492<p>For each memory budget value, you can use a 64-bit integer value with a 1024-based binary unit suffix (for example, B, KB, MB, GB). If there is no user-provided suffix, &#x201c;B&#x201d; is the default suffix. See the following examples.</p>
4493<div class="section">
4494<div class="section">
4495<div class="section">
4496<h5><a name="Example"></a>Example</h5>
4497
4498<div>
4499<div>
4500<pre class="source">SET `compiler.groupmemory` &quot;64MB&quot;;
4501
4502SELECT c.custid, COUNT(*)
4503FROM customers c
4504GROUP BY c.custid;
4505</pre></div></div>
4506</div>
4507<div class="section">
4508<h5><a name="Example"></a>Example</h5>
4509
4510<div>
4511<div>
4512<pre class="source">SET `compiler.sortmemory` &quot;67108864&quot;;
4513
4514SELECT VALUE o
4515FROM orders AS o
4516ORDER BY ARRAY_LENGTH(o.items) DESC;
4517</pre></div></div>
4518</div>
4519<div class="section">
4520<h5><a name="Example"></a>Example</h5>
4521
4522<div>
4523<div>
4524<pre class="source">SET `compiler.joinmemory` &quot;132000KB&quot;;
4525
4526SELECT c.name AS cname, o.ordeno AS orderno
4527FROM customers c JOIN orders o ON c.custid = o.custid;
4528</pre></div></div>
4529<!--
4530 ! Licensed to the Apache Software Foundation (ASF) under one
4531 ! or more contributor license agreements. See the NOTICE file
4532 ! distributed with this work for additional information
4533 ! regarding copyright ownership. The ASF licenses this file
4534 ! to you under the Apache License, Version 2.0 (the
4535 ! "License"); you may not use this file except in compliance
4536 ! with the License. You may obtain a copy of the License at
4537 !
4538 ! http://www.apache.org/licenses/LICENSE-2.0
4539 !
4540 ! Unless required by applicable law or agreed to in writing,
4541 ! software distributed under the License is distributed on an
4542 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4543 ! KIND, either express or implied. See the License for the
4544 ! specific language governing permissions and limitations
4545 ! under the License.
4546 !-->
4547</div></div></div></div>
4548<div class="section">
4549<h2><a name="Parallel_Sort_Parameter"></a><a name="Parallel_sort_parameter" id="Parallel_sort_parameter">Parallel Sort Parameter</a></h2>
4550<p>The following parameter enables you to activate or deactivate full parallel sort for order-by operations.</p>
4551<p>When full parallel sort is inactive (<tt>false</tt>), each existing data partition is sorted (in parallel), and then all data partitions are merged into a single node.</p>
4552<p>When full parallel sort is active (<tt>true</tt>), the data is first sampled, and then repartitioned so that each partition contains data that is greater than the previous partition. The data in each partition is then sorted (in parallel), but the sorted partitions are not merged into a single node.</p>
4553<ul>
4554
4555<li><b>compiler.sort.parallel</b>: A boolean specifying whether full parallel sort is active (<tt>true</tt>) or inactive (<tt>false</tt>). The default value is <tt>true</tt>.</li>
4556</ul>
4557<div class="section">
4558<div class="section">
4559<div class="section">
4560<h5><a name="Example"></a>Example</h5>
4561
4562<div>
4563<div>
4564<pre class="source">SET `compiler.sort.parallel` &quot;true&quot;;
4565
4566SELECT VALUE o
4567FROM orders AS o
4568ORDER BY ARRAY_LENGTH(o.items) DESC;
4569</pre></div></div>
4570<!--
4571 ! Licensed to the Apache Software Foundation (ASF) under one
4572 ! or more contributor license agreements. See the NOTICE file
4573 ! distributed with this work for additional information
4574 ! regarding copyright ownership. The ASF licenses this file
4575 ! to you under the Apache License, Version 2.0 (the
4576 ! "License"); you may not use this file except in compliance
4577 ! with the License. You may obtain a copy of the License at
4578 !
4579 ! http://www.apache.org/licenses/LICENSE-2.0
4580 !
4581 ! Unless required by applicable law or agreed to in writing,
4582 ! software distributed under the License is distributed on an
4583 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4584 ! KIND, either express or implied. See the License for the
4585 ! specific language governing permissions and limitations
4586 ! under the License.
4587 !-->
4588</div></div></div></div>
4589<div class="section">
4590<h2><a name="Controlling_Index-Only-Plan_Parameter"></a><a name="Index_Only" id="Index_Only">Controlling Index-Only-Plan Parameter</a></h2>
4591<p>By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible. For example, if a <tt>SELECT</tt> or <tt>JOIN</tt> query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer checks whether a secondary-index search alone can generate the result that the query asks for. It mainly checks two conditions: (1) predicates used in <tt>WHERE</tt> only uses the primary key field and/or secondary key field and (2) the result does not return any other fields. If these two conditions hold, it builds an index-only plan. Since an index-only plan only searches a secondary-index to answer a query, it is faster than a non-index-only plan that needs to search the primary index. However, this index-only plan can be turned off per query by setting the following parameter.</p>
4592<ul>
4593
4594<li><b>compiler.indexonly</b>: if this is set to false, the index-only-plan will not be applied; the default value is true.</li>
4595</ul>
4596<div class="section">
4597<div class="section">
4598<div class="section">
4599<h5><a name="Example"></a>Example</h5>
4600
4601<div>
4602<div>
4603<pre class="source">set `compiler.indexonly` &quot;false&quot;;
4604
4605SELECT o.order_date AS orderdate
4606FROM orders o where o.order_date = &quot;2020-05-01&quot;;
4607</pre></div></div>
4608<!--
4609 ! Licensed to the Apache Software Foundation (ASF) under one
4610 ! or more contributor license agreements. See the NOTICE file
4611 ! distributed with this work for additional information
4612 ! regarding copyright ownership. The ASF licenses this file
4613 ! to you under the Apache License, Version 2.0 (the
4614 ! "License"); you may not use this file except in compliance
4615 ! with the License. You may obtain a copy of the License at
4616 !
4617 ! http://www.apache.org/licenses/LICENSE-2.0
4618 !
4619 ! Unless required by applicable law or agreed to in writing,
4620 ! software distributed under the License is distributed on an
4621 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4622 ! KIND, either express or implied. See the License for the
4623 ! specific language governing permissions and limitations
4624 ! under the License.
4625 !-->
4626</div></div></div></div>
4627<div class="section">
4628<h2><a name="Controlling_Array-Index_Access_Method_Plan_Parameter"></a><a name="ArrayIndexFlag" id="ArrayIndexFlag">Controlling Array-Index Access Method Plan Parameter</a></h2>
4629<p>By default, the system attempts to utilize array indexes as an access method if an array index is present and is applicable. If you believe that your query will not benefit from an array index, toggle the parameter below.</p>
4630<ul>
4631
4632<li><b>compiler.arrayindex</b>: if this is set to true, array indexes will be considered as an access method for applicable queries; the default value is true.</li>
4633</ul>
4634<div class="section">
4635<div class="section">
4636<h4><a name="Example"></a>Example</h4>
4637
4638<div>
4639<div>
4640<pre class="source">set `compiler.arrayindex` &quot;false&quot;;
4641
4642SELECT o.orderno
4643FROM orders o
4644WHERE SOME i IN o.items
4645SATISFIES i.price = 19.91;
4646</pre></div></div>
4647<!--
4648 ! Licensed to the Apache Software Foundation (ASF) under one
4649 ! or more contributor license agreements. See the NOTICE file
4650 ! distributed with this work for additional information
4651 ! regarding copyright ownership. The ASF licenses this file
4652 ! to you under the Apache License, Version 2.0 (the
4653 ! "License"); you may not use this file except in compliance
4654 ! with the License. You may obtain a copy of the License at
4655 !
4656 ! http://www.apache.org/licenses/LICENSE-2.0
4657 !
4658 ! Unless required by applicable law or agreed to in writing,
4659 ! software distributed under the License is distributed on an
4660 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4661 ! KIND, either express or implied. See the License for the
4662 ! specific language governing permissions and limitations
4663 ! under the License.
4664 !-->
4665</div></div></div>
4666<div class="section">
4667<h2><a name="Query_Hints"></a><a name="Query_hints" id="Query_hints">Query Hints</a></h2>
4668<div class="section">
4669<div class="section">
4670<h4><a name="a.E2.80.9Chash.E2.80.9D_GROUP_BY_hint"></a><a name="hash_groupby" id="hash_groupby">&#x201c;hash&#x201d; GROUP BY hint</a></h4>
4671<p>The system supports two algorithms for GROUP BY clause evaluation: pre-sorted and hash-based. By default it uses the pre-sorted approach: The input data is first sorted on the grouping fields and then aggregation is performed on that sorted data. The alternative is a hash-based strategy which can be enabled via a <tt>/*+ hash */</tt> GROUP BY hint: The data is aggregated using an in-memory hash-table (that can spill to disk if necessary). This approach is recommended for low-cardinality grouping fields.</p>
4672<div class="section">
4673<h5><a name="Example:"></a>Example:</h5>
4674
4675<div>
4676<div>
4677<pre class="source">SELECT c.address.state, count(*)
4678FROM Customers AS c
4679/*+ hash */ GROUP BY c.address.state
4680</pre></div></div>
4681</div></div>
4682<div class="section">
4683<h4><a name="a.E2.80.9Chash-bcast.E2.80.9D_JOIN_hint"></a><a name="hash_bcast_join" id="hash_bcast_join">&#x201c;hash-bcast&#x201d; JOIN hint</a></h4>
4684<p>By default the system uses a partitioned-parallel hash join strategy to parallelize the execution of an equi-join. In this approach both sides of the join are repartitioned (if necessary) on a hash of the join key; potentially matching data items thus arrive at the same partition to be joined locally. This strategy is robust, but not always the fastest when one of the join sides is low cardinality and the other is high cardinality (since it scans and potentially moves the data from both sides). This special case can be better handled by broadcasting (replicating) the smaller side to all data partitions of the larger side and not moving the data from the other (larger) side. The system provides a join hint to enable this strategy: <tt>/*+ hash-bcast */</tt>. This hint forces the right side of the join to be replicated while the left side retains its original partitioning.</p>
4685<div class="section">
4686<h5><a name="Example:"></a>Example:</h5>
4687
4688<div>
4689<div>
4690<pre class="source">SELECT *
4691FROM Orders AS o JOIN Customers AS c
4692ON o.customer_id /*+ hash-bcast */ = c.customer_id
4693</pre></div></div>
4694<!--
4695 ! Licensed to the Apache Software Foundation (ASF) under one
4696 ! or more contributor license agreements. See the NOTICE file
4697 ! distributed with this work for additional information
4698 ! regarding copyright ownership. The ASF licenses this file
4699 ! to you under the Apache License, Version 2.0 (the
4700 ! "License"); you may not use this file except in compliance
4701 ! with the License. You may obtain a copy of the License at
4702 !
4703 ! http://www.apache.org/licenses/LICENSE-2.0
4704 !
4705 ! Unless required by applicable law or agreed to in writing,
4706 ! software distributed under the License is distributed on an
4707 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4708 ! KIND, either express or implied. See the License for the
4709 ! specific language governing permissions and limitations
4710 ! under the License.
4711 !-->
4712</div></div></div></div>
4713<div class="section">
4714<h2><a name="Appendix_3._Variable_Bindings_and_Name_Resolution"></a><a name="Variable_bindings_and_name_resolution" id="Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></h2><!--
4715 ! Licensed to the Apache Software Foundation (ASF) under one
4716 ! or more contributor license agreements. See the NOTICE file
4717 ! distributed with this work for additional information
4718 ! regarding copyright ownership. The ASF licenses this file
4719 ! to you under the Apache License, Version 2.0 (the
4720 ! "License"); you may not use this file except in compliance
4721 ! with the License. You may obtain a copy of the License at
4722 !
4723 ! http://www.apache.org/licenses/LICENSE-2.0
4724 !
4725 ! Unless required by applicable law or agreed to in writing,
4726 ! software distributed under the License is distributed on an
4727 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4728 ! KIND, either express or implied. See the License for the
4729 ! specific language governing permissions and limitations
4730 ! under the License.
4731 !-->
4732
4733<p>In this Appendix, we&#x2019;ll look at how variables are bound and how names are resolved. Names can appear in every clause of a query. Sometimes a name consists of just a single identifier, e.g., <tt>region</tt> or <tt>revenue</tt>. More often a name will consist of two identifiers separated by a dot, e.g., <tt>customer.address</tt>. Occasionally a name may have more than two identifiers, e.g., <tt>policy.owner.address.zipcode</tt>. <i>Resolving</i> a name means determining exactly what the (possibly multi-part) name refers to. It is necessary to have well-defined rules for how to resolve a name in cases of ambiguity. (In the absence of schemas, such cases arise more commonly, and also differently, than they do in SQL.)</p>
4734<p>The basic job of each clause in a query block is to bind variables. Each clause sees the variables bound by previous clauses and may bind additional variables. Names are always resolved with respect to the variables that are bound (&#x201c;in scope&#x201d;) at the place where the name use in question occurs. It is possible that the name resolution process will fail, which may lead to an empty result or an error message.</p>
4735<p>One important bit of background: When the system is reading a query and resolving its names, it has a list of all the available dataverses and datasets. As a result, it knows whether <tt>a.b</tt> is a valid name for dataset <tt>b</tt> in dataverse <tt>a</tt>. However, the system does not in general have knowledge of the schemas of the data inside the datasets; remember that this is a much more open world. As a result, in general the system cannot know whether any object in a particular dataset will have a field named <tt>c</tt>. These assumptions affect how errors are handled. If you try to access dataset <tt>a.b</tt> and no dataset by that name exists, you will get an error and your query will not run. However, if you try to access a field <tt>c</tt> in a collection of objects, your query will run and return <tt>missing</tt> for each object that doesn&#x2019;t have a field named <tt>c</tt> - this is because it&#x2019;s possible that some object (someday) could have such a field.</p></div>
4736<div class="section">
4737<h2><a name="Binding_Variables"></a><a name="Binding_variables" id="Binding_variables">Binding Variables</a></h2>
4738<p>Variables can be bound in the following ways:</p>
4739<ol style="list-style-type: decimal">
4740
4741<li>
4742
4743<p><tt>WITH</tt> and <tt>LET</tt> clauses bind a variable to the result of an expression in a straightforward way</p>
4744<p>Examples:</p>
4745<p><tt>WITH cheap_parts AS (SELECT partno FROM parts WHERE price &lt; 100)</tt> binds the variable <tt>cheap_parts</tt> to the result of the subquery.</p>
4746<p><tt>LET pay = salary + bonus</tt> binds the variable <tt>pay</tt> to the result of evaluating the expression <tt>salary + bonus</tt>.</p>
4747</li>
4748<li>
4749
4750<p><tt>FROM</tt>, <tt>GROUP BY</tt>, and <tt>SELECT</tt> clauses have optional <tt>AS</tt> subclauses that contain an expression and a name (called an <i>iteration variable</i> in a <tt>FROM</tt> clause, or an alias in <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
4751<p>Examples:</p>
4752<p><tt>FROM customer AS c, order AS o</tt></p>
4753<p><tt>GROUP BY salary + bonus AS total_pay</tt></p>
4754<p><tt>SELECT MAX(price) AS highest_price</tt></p>
4755<p>An <tt>AS</tt> subclause always binds the name (as a variable) to the result of the expression (or, in the case of a <tt>FROM</tt> clause, to the <i>individual members</i> of the collection identified by the expression).</p>
4756<p>It&#x2019;s always a good practice to use the keyword <tt>AS</tt> when defining an alias or iteration variable. However, as in SQL, the syntax allows the keyword <tt>AS</tt> to be omitted. For example, the <tt>FROM</tt> clause above could have been written like this:</p>
4757<p><tt>FROM customer c, order o</tt></p>
4758<p>Omitting the keyword <tt>AS</tt> does not affect the binding of variables. The FROM clause in this example binds variables c and o whether the keyword AS is used or not.</p>
4759<p>In certain cases, a variable is automatically bound even if no alias or variable-name is specified. Whenever an expression could have been followed by an AS subclause, if the expression consists of a simple name or a path expression, that expression binds a variable whose name is the same as the simple name or the last step in the path expression. Here are some examples:</p>
4760<p><tt>FROM customer, order</tt> binds iteration variables named <tt>customer</tt> and <tt>order</tt></p>
4761<p><tt>GROUP BY address.zipcode</tt> binds a variable named <tt>zipcode</tt></p>
4762<p><tt>SELECT item[0].price</tt> binds a variable named <tt>price</tt></p>
4763<p>Note that a <tt>FROM</tt> clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn. The name of the collection remains in scope, but it is not a variable. For example, consider this <tt>FROM</tt> clause used in a self-join:</p>
4764<p><tt>FROM customer AS c1, customer AS c2</tt></p>
4765<p>This <tt>FROM</tt> clause joins the customer dataset to itself, binding the iteration variables <tt>c1</tt> and <tt>c2</tt> to objects in the left-hand-side and right-hand-side of the join, respectively. After the <tt>FROM</tt> clause, <tt>c1</tt> and <tt>c2</tt> are in scope as variables, and customer remains accessible as a dataset name but not as a variable.</p>
4766</li>
4767<li>
4768
4769<p>Special rules for <tt>GROUP BY</tt>:</p>
4770<ul>
4771
4772<li>
4773
4774<p>(3A): If a <tt>GROUP BY</tt> clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself. For example:</p>
4775<p><tt>GROUP BY salary + bonus</tt> binds a pseudo-variable named <tt>salary + bonus</tt>.</p>
4776<p>This rule allows subsequent clauses to refer to the grouping expression (salary + bonus) even though its constituent variables (salary and bonus) are no longer in scope. For example, the following query is valid:</p>
4777
4778<div>
4779<div>
4780<pre class="source">FROM employee
4781GROUP BY salary + bonus
4782HAVING salary + bonus &gt; 1000
4783SELECT salary + bonus, COUNT(*) AS how_many
4784</pre></div></div>
4785
4786<p>While it might have been more elegant to explicitly require an alias in cases like this, the pseudo-variable rule is retained for SQL compatibility. Note that the expression <tt>salary + bonus</tt> is not <i>actually</i> evaluated in the <tt>HAVING</tt> and <tt>SELECT</tt> clauses (and could not be since <tt>salary</tt> and <tt>bonus</tt> are no longer individually in scope). Instead, the expression <tt>salary + bonus</tt> is treated as a reference to the pseudo-variable defined in the <tt>GROUP BY</tt> clause.</p>
4787</li>
4788<li>
4789
4790<p>(3B): The <tt>GROUP BY</tt> clause may be followed by a <tt>GROUP AS</tt> clause that binds a variable to the group. The purpose of this variable is to make the individual objects inside the group visible to subqueries that may need to iterate over them.</p>
4791<p>The <tt>GROUP AS</tt> variable is bound to a multiset of objects. Each object represents one of the members of the group. Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest <tt>FROM</tt> clause (and its <tt>LET</tt> subclause, if any). These nested objects, in turn, contain the actual fields of the group-member. To understand this process, consider the following query fragment:</p>
4792
4793<div>
4794<div>
4795<pre class="source">FROM parts AS p, suppliers AS s
4796WHERE p.suppno = s.suppno
4797GROUP BY p.color GROUP AS g
4798</pre></div></div>
4799
4800<p>Suppose that the objects in <tt>parts</tt> have fields <tt>partno</tt>, <tt>color</tt>, and <tt>suppno</tt>. Suppose that the objects in suppliers have fields <tt>suppno</tt> and <tt>location</tt>.</p>
4801<p>Then, for each group formed by the <tt>GROUP BY</tt>, the variable g will be bound to a multiset with the following structure:</p>
4802
4803<div>
4804<div>
4805<pre class="source">[ { &quot;p&quot;: { &quot;partno&quot;: &quot;p1&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s1&quot; },
4806 &quot;s&quot;: { &quot;suppno&quot;: &quot;s1&quot;, &quot;location&quot;: &quot;Denver&quot; } },
4807 { &quot;p&quot;: { &quot;partno&quot;: &quot;p2&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s2&quot; },
4808 &quot;s&quot;: { &quot;suppno&quot;: &quot;s2&quot;, &quot;location&quot;: &quot;Atlanta&quot; } },
4809 ...
4810]
4811</pre></div></div>
4812</li>
4813</ul>
4814</li>
4815</ol></div>
4816<div class="section">
4817<h2><a name="Scoping" id="Scoping">Scoping</a></h2>
4818<p>In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a <tt>WITH</tt> clause at the beginning of the query. More specific rules follow.</p>
4819<p>The clauses in a query block are conceptually processed in the following order:</p>
4820<ul>
4821
4822<li><tt>FROM</tt> (followed by LET subclause, if any)</li>
4823<li><tt>WHERE</tt></li>
4824<li><tt>GROUP BY</tt> (followed by LET subclause, if any)</li>
4825<li><tt>HAVING</tt></li>
4826<li><tt>SELECT</tt> or <tt>SELECT VALUE</tt></li>
4827<li><tt>ORDER BY</tt></li>
4828<li><tt>OFFSET</tt></li>
4829<li><tt>LIMIT</tt></li>
4830</ul>
4831<p>During processing of each clause, the variables that are in scope are those variables that are bound in the following places:</p>
4832<ol style="list-style-type: decimal">
4833
4834<li>
4835
4836<p>In earlier clauses of the same query block (as defined by the ordering given above).</p>
4837<p>Example: <tt>FROM orders AS o SELECT o.date</tt> The variable <tt>o</tt> in the <tt>SELECT</tt> clause is bound, in turn, to each object in the dataset <tt>orders</tt>.</p>
4838</li>
4839<li>
4840
4841<p>In outer query blocks in which the current query block is nested. In case of duplication, the innermost binding wins.</p>
4842</li>
4843<li>
4844
4845<p>In the <tt>WITH</tt> clause (if any) at the beginning of the query.</p>
4846</li>
4847</ol>
4848<p>However, in a query block where a <tt>GROUP BY</tt> clause is present:</p>
4849<ol style="list-style-type: decimal">
4850
4851<li>
4852
4853<p>In clauses processed before <tt>GROUP BY</tt>, scoping rules are the same as though no GROUP BY were present.</p>
4854</li>
4855<li>
4856
4857<p>In clauses processed after <tt>GROUP BY</tt>, the variables bound in the nearest <tt>FROM</tt>-clause (and its <tt>LET</tt> subclause, if any) are removed from scope and replaced by the variables bound in the <tt>GROUP BY</tt> clause (and its <tt>LET</tt> subclause, if any). However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (<tt>MIN</tt>, <tt>MAX</tt>, <tt>AVG</tt>, <tt>SUM</tt>, and <tt>COUNT</tt>). These functions still need to see the individual data items over which they are computing an aggregation. For example, after <tt>FROM employee AS e GROUP BY deptno</tt>, it would not be valid to reference <tt>e.salary</tt>, but <tt>AVG(e.salary)</tt> would be valid.</p>
4858</li>
4859</ol>
4860<p>Special case: In an expression inside a <tt>FROM</tt> clause, a variable is in scope if it was bound in an earlier expression in the same <tt>FROM</tt> clause. Example:</p>
4861
4862<div>
4863<div>
4864<pre class="source">FROM orders AS o, o.items AS i
4865</pre></div></div>
4866
4867<p>The reason for this special case is to support iteration over nested collections.</p>
4868<p>Note that, since the <tt>SELECT</tt> clause comes <i>after</i> the <tt>WHERE</tt> and <tt>GROUP BY</tt> clauses in conceptual processing order, any variables defined in <tt>SELECT</tt> are not visible in <tt>WHERE</tt> or <tt>GROUP BY</tt>. Therefore the following query will not return what might be the expected result (since in the WHERE clause, <tt>pay</tt> will be interpreted as a field in the <tt>emp</tt> object rather than as the computed value <tt>salary + bonus</tt>):</p>
4869
4870<div>
4871<div>
4872<pre class="source">SELECT name, salary + bonus AS pay
4873FROM emp
4874WHERE pay &gt; 1000
4875ORDER BY pay
4876</pre></div></div>
4877
4878<p>The likely intent of the query above can be accomplished as follows:</p>
4879
4880<div>
4881<div>
4882<pre class="source">FROM emp AS e
4883LET pay = e.salary + e.bonus
4884WHERE pay &gt; 1000
4885SELECT e.name, pay
4886ORDER BY pay
4887</pre></div></div>
4888
4889<p>Note that in the phrase <i>expr1</i> <tt>JOIN</tt> <i>expr2</i> <tt>ON</tt> <i>expr3</i>, variables defined in <i>expr1</i> are visible in <i>expr3</i> but not in <i>expr2</i>. Here&#x2019;s an example that will not work:</p>
4890
4891<div>
4892<div>
4893<pre class="source">FROM orders AS o JOIN o.items AS i ON 1 = 1
4894</pre></div></div>
4895
4896<p>The variable <tt>o</tt>, defined in the phrase before <tt>JOIN</tt>, cannot be used in the phrase immediately following <tt>JOIN</tt>. The probable intent of this example could be accomplished in either of the following ways:</p>
4897
4898<div>
4899<div>
4900<pre class="source">FROM orders AS o UNNEST o.items AS i
4901
4902FROM orders AS o, o.items AS i
4903</pre></div></div>
4904
4905<p>To summarize this rule: You may not use left-correlation in an explicit <tt>JOIN</tt> clause.</p></div>
4906<div class="section">
4907<h2><a name="Resolving_Names"></a><a name="Resolving_names" id="Resolving_names">Resolving Names</a></h2>
4908<p>The process of name resolution begins with the leftmost identifier in the name. The rules for resolving the leftmost identifier are:</p>
4909<ol style="list-style-type: decimal">
4910
4911<li>
4912
4913<p><i>In a <tt>FROM</tt> clause</i>: Names in a <tt>FROM</tt> clause identify the collections over which the query block will iterate. These collections may be stored datasets, views, synonyms, or may be the results of nested query blocks. A stored dataset may be in a named dataverse or in the default dataverse. Thus, if the two-part name <tt>a.b</tt> is in a <tt>FROM</tt> clause, a might represent a dataverse and <tt>b</tt> might represent a dataset in that dataverse. Another example of a two-part name in a <tt>FROM</tt> clause is <tt>FROM orders AS o, o.items AS i</tt>. In <tt>o.items</tt>, <tt>o</tt> represents an order object bound earlier in the <tt>FROM</tt> clause, and items represents the items object inside that order.</p>
4914<p>The rules for resolving the leftmost identifier in a <tt>FROM</tt> clause (including a <tt>JOIN</tt> subclause), or in the expression following <tt>IN</tt> in a quantified predicate, are as follows:</p>
4915<ul>
4916
4917<li>
4918
4919<p>(1A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery).</p>
4920</li>
4921<li>
4922
4923<p>(1B): Otherwise, if the identifier is the first part of a two-part name like <tt>a.b</tt>, the name is treated as <tt>dataverse.dataset</tt>. If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. If the designated dataset exists then the identifier is resolved to that dataset, othwerise if a view with given name exists then the identifier is resolved to that view, otherwise if a synonym with given name exists then the identifier is resolved to the target dataset or the target view of that synonym (potentially recursively if this synonym points to another synonym). An error will result if the designated dataset, view, or a synonym with this name does not exist.</p>
4924<p>Datasets and views take precedence over synonyms, so if both a dataset (or a view) and a synonym have the same name then the resolution is to the dataset. Note that there cannot be a dataset and a view with the same name.</p>
4925</li>
4926</ul>
4927</li>
4928<li>
4929
4930<p><i>Elsewhere in a query block</i>: In clauses other than <tt>FROM</tt>, a name typically identifies a field of some object. For example, if the expression <tt>a.b</tt> is in a <tt>SELECT</tt> or <tt>WHERE</tt> clause, it&#x2019;s likely that <tt>a</tt> represents an object and <tt>b</tt> represents a field in that object.</p>
4931<p>The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are:</p>
4932<ul>
4933
4934<li>
4935
4936<p>(2A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block).</p>
4937</li>
4938<li>
4939
4940<p>(2B): (The &#x201c;Single Variable Rule&#x201d;): Otherwise, if the <tt>FROM</tt> clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. For example, in the query <tt>FROM customer SELECT address</tt>, the identifier address is treated as a field in the object bound to the variable <tt>customer</tt>. At runtime, if the object bound to <tt>customer</tt> has no <tt>address</tt> field, the <tt>address</tt> expression will return <tt>missing</tt>. If the <tt>FROM</tt> clause in the current query block binds multiple variables, name resolution fails with an &#x201c;ambiguous name&#x201d; error. If there&#x2019;s no <tt>FROM</tt> clause in the current query block, name resolution fails with an &#x201c;undefined identifier&#x201d; error. Note that the Single Variable Rule searches for bound variables only in the current query block, not in outer (containing) blocks. The purpose of this rule is to permit the compiler to resolve field-references unambiguously without relying on any schema information. Also note that variables defined by <tt>LET</tt> clauses do not participate in the resolution process performed by this rule.</p>
4941<p>Exception: In a query that has a <tt>GROUP BY</tt> clause, the Single Variable Rule does not apply in any clauses that occur after the <tt>GROUP BY</tt> because, in these clauses, the variables bound by the <tt>FROM</tt> clause are no longer in scope. In clauses after <tt>GROUP BY</tt>, only Rule (2A) applies.</p>
4942</li>
4943</ul>
4944</li>
4945<li>
4946
4947<p>In an <tt>ORDER BY</tt> clause following a <tt>UNION ALL</tt> expression:</p>
4948<p>The leftmost identifier is treated as a field-access on the objects that are generated by the <tt>UNION ALL</tt>. For example:</p>
4949
4950<div>
4951<div>
4952<pre class="source">query-block-1
4953UNION ALL
4954query-block-2
4955ORDER BY salary
4956</pre></div></div>
4957
4958<p>In the result of this query, objects that have a foo field will be ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.)</p>
4959</li>
4960<li>
4961
4962<p><i>In a standalone expression</i>: If a query consists of a standalone expression then identifiers inside that expression are resolved according to Rule 1. For example, if the whole query is <tt>ARRAY_COUNT(a.b)</tt> then <tt>a.b</tt> will be treated as dataset <tt>b</tt> contained in dataverse <tt>a</tt>. Note that this rule only applies to identifiers which are located directly inside a standalone expression. Identifiers inside <tt>SELECT</tt> statements in a standalone expression are still resolved according to Rules 1-3. For example, if the whole query is <tt>ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )</tt> then <tt>salary</tt> is resolved as <tt>e.salary</tt> following the &#x201c;Single Variable Rule&#x201d; (Rule (2B)).</p>
4963</li>
4964<li>
4965
4966<p>Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. The name resolves to the field at the end of the path. If this field does not exist, the value <tt>missing</tt> is returned.</p>
4967</li>
4968</ol><!--
4969 ! Licensed to the Apache Software Foundation (ASF) under one
4970 ! or more contributor license agreements. See the NOTICE file
4971 ! distributed with this work for additional information
4972 ! regarding copyright ownership. The ASF licenses this file
4973 ! to you under the Apache License, Version 2.0 (the
4974 ! "License"); you may not use this file except in compliance
4975 ! with the License. You may obtain a copy of the License at
4976 !
4977 ! http://www.apache.org/licenses/LICENSE-2.0
4978 !
4979 ! Unless required by applicable law or agreed to in writing,
4980 ! software distributed under the License is distributed on an
4981 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4982 ! KIND, either express or implied. See the License for the
4983 ! specific language governing permissions and limitations
4984 ! under the License.
4985 !-->
4986</div>
4987<div class="section">
4988<h2><a name="Appendix_4._Example_Data"></a><a name="Manual_data" id="Manual_data">Appendix 4. Example Data</a></h2><!--
4989 ! Licensed to the Apache Software Foundation (ASF) under one
4990 ! or more contributor license agreements. See the NOTICE file
4991 ! distributed with this work for additional information
4992 ! regarding copyright ownership. The ASF licenses this file
4993 ! to you under the Apache License, Version 2.0 (the
4994 ! "License"); you may not use this file except in compliance
4995 ! with the License. You may obtain a copy of the License at
4996 !
4997 ! http://www.apache.org/licenses/LICENSE-2.0
4998 !
4999 ! Unless required by applicable law or agreed to in writing,
5000 ! software distributed under the License is distributed on an
5001 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
5002 ! KIND, either express or implied. See the License for the
5003 ! specific language governing permissions and limitations
5004 ! under the License.
5005 !-->
5006
5007<p>This appendix lists the data definitions and the datasets used for the examples provided throughout this manual.</p>
5008<div class="section">
5009<h3><a name="Data_Definitions"></a><a name="definition_statements" id="definition_statements">Data Definitions</a></h3>
5010
5011<div>
5012<div>
5013<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
5014
5015USE Commerce;
5016
5017CREATE TYPE addressType AS {
5018 street: string,
5019 city: string,
5020 zipcode: string?
5021};
5022
5023CREATE TYPE customerType AS {
5024 custid: string,
5025 name: string,
5026 address: addressType?
5027};
5028
5029CREATE DATASET customers(customerType)
5030 PRIMARY KEY custid;
5031
5032CREATE TYPE itemType AS {
5033 itemno: int,
5034 qty: int,
5035 price: int
5036};
5037
5038CREATE TYPE orderType AS {
5039 orderno: int,
5040 custid: string,
5041 order_date: string,
5042 ship_date: string?,
5043 items: [ itemType ]
5044};
5045
5046CREATE DATASET orders(orderType)
5047 PRIMARY KEY orderno;
5048</pre></div></div>
5049</div>
5050<div class="section">
5051<h3><a name="Customers_Data"></a><a name="customers_data" id="customers_data">Customers Data</a></h3>
5052
5053<div>
5054<div>
5055<pre class="source">[
5056 {
5057 &quot;custid&quot;: &quot;C13&quot;,
5058 &quot;name&quot;: &quot;T. Cody&quot;,
5059 &quot;address&quot;: {
5060 &quot;street&quot;: &quot;201 Main St.&quot;,
5061 &quot;city&quot;: &quot;St. Louis, MO&quot;,
5062 &quot;zipcode&quot;: &quot;63101&quot;
5063 },
5064 &quot;rating&quot;: 750
5065 },
5066 {
5067 &quot;custid&quot;: &quot;C25&quot;,
5068 &quot;name&quot;: &quot;M. Sinclair&quot;,
5069 &quot;address&quot;: {
5070 &quot;street&quot;: &quot;690 River St.&quot;,
5071 &quot;city&quot;: &quot;Hanover, MA&quot;,
5072 &quot;zipcode&quot;: &quot;02340&quot;
5073 },
5074 &quot;rating&quot;: 690
5075 },
5076 {
5077 &quot;custid&quot;: &quot;C31&quot;,
5078 &quot;name&quot;: &quot;B. Pruitt&quot;,
5079 &quot;address&quot;: {
5080 &quot;street&quot;: &quot;360 Mountain Ave.&quot;,
5081 &quot;city&quot;: &quot;St. Louis, MO&quot;,
5082 &quot;zipcode&quot;: &quot;63101&quot;
5083 }
5084 },
5085 {
5086 &quot;custid&quot;: &quot;C35&quot;,
5087 &quot;name&quot;: &quot;J. Roberts&quot;,
5088 &quot;address&quot;: {
5089 &quot;street&quot;: &quot;420 Green St.&quot;,
5090 &quot;city&quot;: &quot;Boston, MA&quot;,
5091 &quot;zipcode&quot;: &quot;02115&quot;
5092 },
5093 &quot;rating&quot;: 565
5094 },
5095 {
5096 &quot;custid&quot;: &quot;C37&quot;,
5097 &quot;name&quot;: &quot;T. Henry&quot;,
5098 &quot;address&quot;: {
5099 &quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
5100 &quot;city&quot;: &quot;Boston, MA&quot;,
5101 &quot;zipcode&quot;: &quot;02115&quot;
5102 },
5103 &quot;rating&quot;: 750
5104 },
5105 {
5106 &quot;custid&quot;: &quot;C41&quot;,
5107 &quot;name&quot;: &quot;R. Dodge&quot;,
5108 &quot;address&quot;: {
5109 &quot;street&quot;: &quot;150 Market St.&quot;,
5110 &quot;city&quot;: &quot;St. Louis, MO&quot;,
5111 &quot;zipcode&quot;: &quot;63101&quot;
5112 },
5113 &quot;rating&quot;: 640
5114 },
5115 {
5116 &quot;custid&quot;: &quot;C47&quot;,
5117 &quot;name&quot;: &quot;S. Logan&quot;,
5118 &quot;address&quot;: {
5119 &quot;street&quot;: &quot;Via del Corso&quot;,
5120 &quot;city&quot;: &quot;Rome, Italy&quot;
5121 },
5122 &quot;rating&quot;: 625
5123 }
5124]
5125</pre></div></div>
5126</div>
5127<div class="section">
5128<h3><a name="Orders_Data"></a><a name="orders_data" id="orders_data">Orders Data</a></h3>
5129
5130<div>
5131<div>
5132<pre class="source">[
5133 {
5134 &quot;orderno&quot;: 1001,
5135 &quot;custid&quot;: &quot;C41&quot;,
5136 &quot;order_date&quot;: &quot;2020-04-29&quot;,
5137 &quot;ship_date&quot;: &quot;2020-05-03&quot;,
5138 &quot;items&quot;: [
5139 {
5140 &quot;itemno&quot;: 347,
5141 &quot;qty&quot;: 5,
5142 &quot;price&quot;: 19.99
5143 },
5144 {
5145 &quot;itemno&quot;: 193,
5146 &quot;qty&quot;: 2,
5147 &quot;price&quot;: 28.89
5148 }
5149 ]
5150 },
5151 {
5152 &quot;orderno&quot;: 1002,
5153 &quot;custid&quot;: &quot;C13&quot;,
5154 &quot;order_date&quot;: &quot;2020-05-01&quot;,
5155 &quot;ship_date&quot;: &quot;2020-05-03&quot;,
5156 &quot;items&quot;: [
5157 {
5158 &quot;itemno&quot;: 460,
5159 &quot;qty&quot;: 95,
5160 &quot;price&quot;: 100.99
5161 },
5162 {
5163 &quot;itemno&quot;: 680,
5164 &quot;qty&quot;: 150,
5165 &quot;price&quot;: 8.75
5166 }
5167 ]
5168 },
5169 {
5170 &quot;orderno&quot;: 1003,
5171 &quot;custid&quot;: &quot;C31&quot;,
5172 &quot;order_date&quot;: &quot;2020-06-15&quot;,
5173 &quot;ship_date&quot;: &quot;2020-06-16&quot;,
5174 &quot;items&quot;: [
5175 {
5176 &quot;itemno&quot;: 120,
5177 &quot;qty&quot;: 2,
5178 &quot;price&quot;: 88.99
5179 },
5180 {
5181 &quot;itemno&quot;: 460,
5182 &quot;qty&quot;: 3,
5183 &quot;price&quot;: 99.99
5184 }
5185 ]
5186 },
5187 {
5188 &quot;orderno&quot;: 1004,
5189 &quot;custid&quot;: &quot;C35&quot;,
5190 &quot;order_date&quot;: &quot;2020-07-10&quot;,
5191 &quot;ship_date&quot;: &quot;2020-07-15&quot;,
5192 &quot;items&quot;: [
5193 {
5194 &quot;itemno&quot;: 680,
5195 &quot;qty&quot;: 6,
5196 &quot;price&quot;: 9.99
5197 },
5198 {
5199 &quot;itemno&quot;: 195,
5200 &quot;qty&quot;: 4,
5201 &quot;price&quot;: 35
5202 }
5203 ]
5204 },
5205 {
5206 &quot;orderno&quot;: 1005,
5207 &quot;custid&quot;: &quot;C37&quot;,
5208 &quot;order_date&quot;: &quot;2020-08-30&quot;,
5209 &quot;items&quot;: [
5210 {
5211 &quot;itemno&quot;: 460,
5212 &quot;qty&quot;: 2,
5213 &quot;price&quot;: 99.98
5214 },
5215 {
5216 &quot;itemno&quot;: 347,
5217 &quot;qty&quot;: 120,
5218 &quot;price&quot;: 22
5219 },
5220 {
5221 &quot;itemno&quot;: 780,
5222 &quot;qty&quot;: 1,
5223 &quot;price&quot;: 1500
5224 },
5225 {
5226 &quot;itemno&quot;: 375,
5227 &quot;qty&quot;: 2,
5228 &quot;price&quot;: 149.98
5229 }
5230 ]
5231 },
5232 {
5233 &quot;orderno&quot;: 1006,
5234 &quot;custid&quot;: &quot;C41&quot;,
5235 &quot;order_date&quot;: &quot;2020-09-02&quot;,
5236 &quot;ship_date&quot;: &quot;2020-09-04&quot;,
5237 &quot;items&quot;: [
5238 {
5239 &quot;itemno&quot;: 680,
5240 &quot;qty&quot;: 51,
5241 &quot;price&quot;: 25.98
5242 },
5243 {
5244 &quot;itemno&quot;: 120,
5245 &quot;qty&quot;: 65,
5246 &quot;price&quot;: 85
5247 },
5248 {
5249 &quot;itemno&quot;: 460,
5250 &quot;qty&quot;: 120,
5251 &quot;price&quot;: 99.98
5252 }
5253 ]
5254 },
5255 {
5256 &quot;orderno&quot;: 1007,
5257 &quot;custid&quot;: &quot;C13&quot;,
5258 &quot;order_date&quot;: &quot;2020-09-13&quot;,
5259 &quot;ship_date&quot;: &quot;2020-09-20&quot;,
5260 &quot;items&quot;: [
5261 {
5262 &quot;itemno&quot;: 185,
5263 &quot;qty&quot;: 5,
5264 &quot;price&quot;: 21.99
5265 },
5266 {
5267 &quot;itemno&quot;: 680,
5268 &quot;qty&quot;: 1,
5269 &quot;price&quot;: 20.5
5270 }
5271 ]
5272 },
5273 {
5274 &quot;orderno&quot;: 1008,
5275 &quot;custid&quot;: &quot;C13&quot;,
5276 &quot;order_date&quot;: &quot;2020-10-13&quot;,
5277 &quot;items&quot;: [
5278 {
5279 &quot;itemno&quot;: 460,
5280 &quot;qty&quot;: 20,
5281 &quot;price&quot;: 99.99
5282 }
5283 ]
5284 },
5285 {
5286 &quot;orderno&quot;: 1009,
5287 &quot;custid&quot;: &quot;C13&quot;,
5288 &quot;order_date&quot;: &quot;2020-10-13&quot;,
5289 &quot;items&quot;: []
5290 }
5291]
5292</pre></div></div></div></div>
5293 </div>
5294 </div>
5295 </div>
5296 <hr/>
5297 <footer>
5298 <div class="container-fluid">
5299 <div class="row-fluid">
5300<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
5301 feather logo, and the Apache AsterixDB project logo are either
5302 registered trademarks or trademarks of The Apache Software
5303 Foundation in the United States and other countries.
5304 All other marks mentioned may be trademarks or registered
5305 trademarks of their respective owners.
5306 </div>
5307 </div>
5308 </div>
5309 </footer>
5310 </body>
5311</html>