blob: 9bdb5bc472c718d4390d06bcc713f92221aae956 [file] [log] [blame]
Ian Maxon49d15b22020-12-06 16:23:00 -08001<!DOCTYPE html>
2<!--
3 | Generated by Apache Maven Doxia Site Renderer 1.8.1 from target/generated-site/markdown/sqlpp/manual.md at 2020-12-06
4 | Rendered using Apache Maven Fluido Skin 1.7
5-->
6<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7 <head>
8 <meta charset="UTF-8" />
9 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10 <meta name="Date-Revision-yyyymmdd" content="20201206" />
11 <meta http-equiv="Content-Language" content="en" />
12 <title>AsterixDB &#x2013; The SQL++ Query Language</title>
13 <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
14 <link rel="stylesheet" href="../css/site.css" />
15 <link rel="stylesheet" href="../css/print.css" media="print" />
16 <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
17
18 </head>
19 <body class="topBarDisabled">
20 <div class="container-fluid">
21 <div id="banner">
22 <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png" alt="AsterixDB"/></a></div>
23 <div class="pull-right"></div>
24 <div class="clear"><hr/></div>
25 </div>
26
27 <div id="breadcrumbs">
28 <ul class="breadcrumb">
29 <li id="publishDate">Last Published: 2020-12-06</li>
30 <li id="projectVersion" class="pull-right">Version: 0.9.6-SNAPSHOT</li>
31 <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
32 </ul>
33 </div>
34 <div class="row-fluid">
35 <div id="leftColumn" class="span2">
36 <div class="well sidebar-nav">
37 <ul class="nav nav-list">
38 <li class="nav-header">Get Started - Installation</li>
39 <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
40 <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
41 <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
42 <li class="nav-header">AsterixDB Primer</li>
43 <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
44 <li class="nav-header">Data Model</li>
45 <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
46 <li class="nav-header">Queries</li>
47 <li class="active"><a href="#"><span class="none"></span>The SQL++ Query Language</a></li>
48 <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
49 <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
50 <li class="nav-header">API/SDK</li>
51 <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
52 <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
53 <li class="nav-header">Advanced Features</li>
54 <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
55 <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
56 <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
57 <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
58 <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
59 <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
60 <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
61 <li class="nav-header">Deprecated</li>
62 <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
63 <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
64 <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
65</ul>
66 <hr />
67 <div id="poweredBy">
68 <div class="clear"></div>
69 <div class="clear"></div>
70 <div class="clear"></div>
71 <div class="clear"></div>
72<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" /></a>
73 </div>
74 </div>
75 </div>
76 <div id="bodyColumn" class="span10" >
77<!--
78 ! Licensed to the Apache Software Foundation (ASF) under one
79 ! or more contributor license agreements. See the NOTICE file
80 ! distributed with this work for additional information
81 ! regarding copyright ownership. The ASF licenses this file
82 ! to you under the Apache License, Version 2.0 (the
83 ! "License"); you may not use this file except in compliance
84 ! with the License. You may obtain a copy of the License at
85 !
86 ! http://www.apache.org/licenses/LICENSE-2.0
87 !
88 ! Unless required by applicable law or agreed to in writing,
89 ! software distributed under the License is distributed on an
90 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
91 ! KIND, either express or implied. See the License for the
92 ! specific language governing permissions and limitations
93 ! under the License.
94 !-->
95<h1>The SQL++ Query Language</h1>
96<ul>
97
98<li><a href="#Introduction">1. Introduction</a></li>
99<li><a href="#Expressions">2. Expressions</a>
100<ul>
101
102<li><a href="#Operator_expressions">Operator Expressions</a>
103<ul>
104
105<li><a href="#Arithmetic_operators">Arithmetic Operators</a></li>
106<li><a href="#Collection_operators">Collection Operators</a></li>
107<li><a href="#Comparison_operators">Comparison Operators</a></li>
108<li><a href="#Logical_operators">Logical Operators</a></li>
109</ul>
110</li>
111<li><a href="#Quantified_expressions">Quantified Expressions</a></li>
112<li><a href="#Path_expressions">Path Expressions</a></li>
113<li><a href="#Primary_expressions">Primary Expressions</a>
114<ul>
115
116<li><a href="#Literals">Literals</a></li>
117<li><a href="#Variable_references">Identifiers and Variable References</a></li>
118<li><a href="#Parameter_references">Parameter References</a></li>
119<li><a href="#Parenthesized_expressions">Parenthesized Expressions</a></li>
120<li><a href="#Function_call_expressions">Function calls</a></li>
121<li><a href="#Case_expressions">Case Expressions</a></li>
122<li><a href="#Constructors">Constructors</a></li>
123</ul>
124</li>
125</ul>
126</li>
127<li><a href="#Queries">3. Queries</a>
128<ul>
129
130<li><a href="#Select_clauses">SELECT Clauses</a>
131<ul>
132
133<li><a href="#Select_element">Select Value</a></li>
134<li><a href="#SQL_select">SQL-style Select</a></li>
135<li><a href="#Select_star">Select *</a></li>
136<li><a href="#Select_distinct">Select Distinct</a></li>
137<li><a href="#Unnamed_projections">Unnamed Projections</a></li>
138<li><a href="#Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></li>
139</ul>
140</li>
141<li><a href="#From_clauses">FROM clauses</a>
142<ul>
143
144<li><a href="#Joins">Joins</a></li>
145</ul>
146</li>
147<li><a href="#Let_clauses">LET Clauses</a></li>
148<li><a href="#WHERE_Clause">WHERE Clause</a></li>
149<li><a href="#Grouping">Grouping</a>
150<ul>
151
152<li><a href="#GROUP_BY_Clause">GROUP BY Clause</a></li>
153<li><a href="#HAVING_Clause">HAVING Clause</a></li>
154<li><a href="#Aggregation_PseudoFunctions">Aggregation Pseudo-functions</a></li>
155<li><a href="#GROUP_AS_Clause">GROUP AS Clause</a></li>
156</ul>
157</li>
158<li><a href="#Union_all">Selection and UNION ALL</a></li>
159<li><a href="#With_clauses">WITH Clauses</a></li>
160<li><a href="#Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></li>
161<li><a href="#Subqueries">Subqueries</a></li>
162</ul>
163</li>
164<li><a href="#Over_clauses">4. Window Functions</a>
165<ul>
166
167<li><a href="#Window_function_call">Window Function Call</a>
168<ul>
169
170<li><a href="#Window_function_arguments">Window Function Arguments</a></li>
171<li><a href="#Window_function_options">Window Function Options</a></li>
172<li><a href="#Window_frame_variable">Window Frame Variable</a></li>
173<li><a href="#Window_definition">Window Definition</a></li>
174</ul>
175</li>
176</ul>
177</li>
178<li><a href="#Errors">5. Errors</a>
179<ul>
180
181<li><a href="#Syntax_errors">Syntax Errors</a></li>
182<li><a href="#Identifier_resolution_errors">Identifier Resolution Errors</a></li>
183<li><a href="#Type_errors">Type Errors</a></li>
184<li><a href="#Resource_errors">Resource Errors</a></li>
185</ul>
186</li>
187<li><a href="#Vs_SQL-92">6.Differences from SQL-92</a></li>
188<li><a href="#DDL_and_DML_statements">7. DDL and DML Statements</a>
189<ul>
190
191<li><a href="#Lifecycle_management_statements">Lifecycle Management Statements</a>
192<ul>
193
194<li><a href="#Use">Use Statement</a></li>
195<li><a href="#Sets">Set Statement</a></li>
196<li><a href="#Functions">Function Declaration</a></li>
197<li><a href="#Create">Create Statement</a>
198<ul>
199
200<li><a href="#Dataverses">Create Dataverse</a></li>
201<li><a href="#Types">Create Type</a></li>
202<li><a href="#Datasets">Create Dataset</a></li>
203<li><a href="#Indices">Create Index</a></li>
204<li><a href="#Synonyms">Create Synonym</a></li>
205<li><a href="#Create_function">Create Function</a></li>
206</ul>
207</li>
208<li><a href="#Removal">Drop Statement</a></li>
209<li><a href="#Load_statement">Load Statement</a></li>
210</ul>
211</li>
212<li><a href="#Modification_statements">Modification Statements</a>
213<ul>
214
215<li><a href="#Inserts">Insert Statement</a></li>
216<li><a href="#Upserts">Upsert Statement</a></li>
217<li><a href="#Deletes">Delete Statement</a></li>
218</ul>
219</li>
220</ul>
221</li>
222<li><a href="#Reserved_keywords">Appendix 1. Reserved Keywords</a></li>
223<li><a href="#Performance_tuning">Appendix 2. Performance Tuning</a>
224<ul>
225
226<li><a href="#Parallelism_parameter">Parallelism Parameter</a></li>
227<li><a href="#Memory_parameters">Memory Parameters</a></li>
228<li><a href="#Query_hints">Query Hints</a></li>
229</ul>
230</li>
231<li><a href="#Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></li>
232<li><a href="#Manual_data">Appendix 4. Example Data</a>
233<ul>
234
235<li><a href="#definition_statements">Data Definitions</a></li>
236<li><a href="#customers_data">Customers Dataset</a></li>
237<li><a href="#orders_data">Orders Dataset</a></li>
238</ul>
239</li>
240</ul><!--
241 ! Licensed to the Apache Software Foundation (ASF) under one
242 ! or more contributor license agreements. See the NOTICE file
243 ! distributed with this work for additional information
244 ! regarding copyright ownership. The ASF licenses this file
245 ! to you under the Apache License, Version 2.0 (the
246 ! "License"); you may not use this file except in compliance
247 ! with the License. You may obtain a copy of the License at
248 !
249 ! http://www.apache.org/licenses/LICENSE-2.0
250 !
251 ! Unless required by applicable law or agreed to in writing,
252 ! software distributed under the License is distributed on an
253 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
254 ! KIND, either express or implied. See the License for the
255 ! specific language governing permissions and limitations
256 ! under the License.
257 !-->
258
259<h1><a name="Introduction" id="Introduction">1. Introduction</a><font size="3" /></h1>
260<p>This document is intended as a reference guide to the full syntax and semantics of AsterixDB&#x2019;s query language, a SQL-based language for working with semistructured data. The language is a derivative of SQL++, a declarative query language for JSON data which is largely backwards compatible with SQL. SQL++ originated from research in the FORWARD project at UC San Diego, and it has much in common with SQL; some differences exist due to the different data models that the two languages were designed to serve. SQL was designed for interacting with the flat, schema-ified world of relational databases, while SQL++ generalizes SQL to also handle nested data formats (like JSON) and the schema-optional (or even schema-less) data models of modern NoSQL and BigData systems.</p>
261<p>In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model (<a href="../datamodel.html">ADM</a>), a data model based on a superset of JSON with an enriched and flexible type system. New AsterixDB users are encouraged to read and work through the (much friendlier) guide &#x201c;<a href="primer-sqlpp.html">AsterixDB 101: An ADM and SQL++ Primer</a>&#x201d; before attempting to make use of this document. In addition, readers are advised to read through the <a href="../datamodel.html">Asterix Data Model (ADM) reference guide</a> first as well, as an understanding of the data model is a prerequisite to understanding SQL++.</p>
262<p>In what follows, we detail the features of the SQL++ language in a grammar-guided manner. We list and briefly explain each of the productions in the query grammar, offering examples (and results) for clarity. In this manual, we will explain how to use the various features of SQL++ using two datasets named <tt>customers</tt> and <tt>orders</tt>. Each dataset is a collection of objects. The contents of the example datasets can be found at the end of this manual in <a href="#Manual_data">Appendix 4</a>.</p><!--
263 ! Licensed to the Apache Software Foundation (ASF) under one
264 ! or more contributor license agreements. See the NOTICE file
265 ! distributed with this work for additional information
266 ! regarding copyright ownership. The ASF licenses this file
267 ! to you under the Apache License, Version 2.0 (the
268 ! "License"); you may not use this file except in compliance
269 ! with the License. You may obtain a copy of the License at
270 !
271 ! http://www.apache.org/licenses/LICENSE-2.0
272 !
273 ! Unless required by applicable law or agreed to in writing,
274 ! software distributed under the License is distributed on an
275 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
276 ! KIND, either express or implied. See the License for the
277 ! specific language governing permissions and limitations
278 ! under the License.
279 !-->
280
281<h1><a name="Expressions" id="Expressions">2. Expressions</a></h1><!--
282 ! Licensed to the Apache Software Foundation (ASF) under one
283 ! or more contributor license agreements. See the NOTICE file
284 ! distributed with this work for additional information
285 ! regarding copyright ownership. The ASF licenses this file
286 ! to you under the Apache License, Version 2.0 (the
287 ! "License"); you may not use this file except in compliance
288 ! with the License. You may obtain a copy of the License at
289 !
290 ! http://www.apache.org/licenses/LICENSE-2.0
291 !
292 ! Unless required by applicable law or agreed to in writing,
293 ! software distributed under the License is distributed on an
294 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
295 ! KIND, either express or implied. See the License for the
296 ! specific language governing permissions and limitations
297 ! under the License.
298 !-->
299
300<p>An expression is a language fragment that can be evaluated to return a value. For example, the expression 2 + 3 returns the value 5. Expressions are the building blocks from which queries are constructed. SQL++ supports nearly all of the kinds of expressions in SQL, and adds some new kinds as well.</p>
301<p>SQL++ is an orthogonal language, which means that expressions can serve as operands of higher level expressions. By nesting expressions inside other expressions, complex queries can be built up. Any expression can be enclosed in parentheses to establish operator precedence.</p>
302<p>In this section, we&#x2019;ll discuss the various kinds of SQL++ expressions.</p><hr />
303<div class="section">
304<div class="section">
305<h3><a name="Expr"></a>Expr</h3>
306<p><b><img src="../images/diagrams/Expr.png" alt="" /></b></p><hr /></div></div>
307<div class="section">
308<h2><a name="Operator_Expressions"></a><a name="Operator_expressions" id="Operator_expressions">Operator Expressions</a></h2>
309<p>Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows:</p><hr />
310<div class="section">
311<h3><a name="OperatorExpr"></a>OperatorExpr</h3>
312<p><b><img src="../images/diagrams/OperatorExpr.png" alt="" /></b></p><hr />
313<p>The language provides a full set of operators that you can use within its statements. Here are the categories of operators:</p>
314<ul>
315
316<li><a href="#Arithmetic_operators">Arithmetic Operators</a>, to perform basic mathematical operations;</li>
317<li><a href="#Collection_operators">Collection Operators</a>, to evaluate expressions on collections or objects;</li>
318<li><a href="#Comparison_operators">Comparison Operators</a>, to compare two expressions;</li>
319<li><a href="#Logical_operators">Logical Operators</a>, to combine operators using Boolean logic.</li>
320</ul>
321<p>The following table summarizes the precedence order (from higher to lower) of the major unary and binary operators:</p>
322<table border="0" class="table table-striped">
323<thead>
324
325<tr class="a">
326<th> Operator </th>
327<th> Operation </th></tr>
328</thead><tbody>
329
330<tr class="b">
331<td> EXISTS, NOT EXISTS </td>
332<td> Collection emptiness testing </td></tr>
333<tr class="a">
334<td> ^ </td>
335<td> Exponentiation </td></tr>
336<tr class="b">
337<td> *, /, DIV, MOD (%) </td>
338<td> Multiplication, division, modulo </td></tr>
339<tr class="a">
340<td> +, - </td>
341<td> Addition, subtraction </td></tr>
342<tr class="b">
343<td> || </td>
344<td> String concatenation </td></tr>
345<tr class="a">
346<td> IS NULL, IS NOT NULL, IS MISSING, IS NOT MISSING, <br />IS UNKNOWN, IS NOT UNKNOWN, IS VALUED, IS NOT VALUED </td>
347<td> Unknown value comparison </td></tr>
348<tr class="b">
349<td> BETWEEN, NOT BETWEEN </td>
350<td> Range comparison (inclusive on both sides) </td></tr>
351<tr class="a">
352<td> =, !=, &lt;&gt;, &lt;, &gt;, &lt;=, &gt;=, LIKE, NOT LIKE, IN, NOT IN </td>
353<td> Comparison </td></tr>
354<tr class="b">
355<td> NOT </td>
356<td> Logical negation </td></tr>
357<tr class="a">
358<td> AND </td>
359<td> Conjunction </td></tr>
360<tr class="b">
361<td> OR </td>
362<td> Disjunction </td></tr>
363</tbody>
364</table>
365<p>In general, if any operand evaluates to a <tt>MISSING</tt> value, the enclosing operator will return <tt>MISSING</tt>; if none of the operands evaluates to a <tt>MISSING</tt> value but there is an operand which evaluates to a <tt>NULL</tt> value, the enclosing operator will return <tt>NULL</tt>. However, there are a few exceptions listed in <a href="#Comparison_operators">comparison operators</a> and <a href="#Logical_operators">logical operators</a>.</p></div>
366<div class="section">
367<h3><a name="Arithmetic_Operators"></a><a name="Arithmetic_operators" id="Arithmetic_operators">Arithmetic Operators</a></h3>
368<p>Arithmetic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values.</p>
369<table border="0" class="table table-striped">
370<thead>
371
372<tr class="a">
373<th> Operator </th>
374<th> Purpose </th>
375<th> Example </th></tr>
376</thead><tbody>
377
378<tr class="b">
379<td> +, - </td>
380<td> As unary operators, they denote a <br />positive or negative expression </td>
381<td> SELECT VALUE -1; </td></tr>
382<tr class="a">
383<td> +, - </td>
384<td> As binary operators, they add or subtract </td>
385<td> SELECT VALUE 1 + 2; </td></tr>
386<tr class="b">
387<td> * </td>
388<td> Multiply </td>
389<td> SELECT VALUE 4 * 2; </td></tr>
390<tr class="a">
391<td> / </td>
392<td> Divide (returns a value of type <tt>double</tt> if both operands are integers)</td>
393<td> SELECT VALUE 5 / 2; </td></tr>
394<tr class="b">
395<td> DIV </td>
396<td> Divide (returns an integer value if both operands are integers) </td>
397<td> SELECT VALUE 5 DIV 2; </td></tr>
398<tr class="a">
399<td> MOD (%) </td>
400<td> Modulo </td>
401<td> SELECT VALUE 5 % 2; </td></tr>
402<tr class="b">
403<td> ^ </td>
404<td> Exponentiation </td>
405<td> SELECT VALUE 2^3; </td></tr>
406<tr class="a">
407<td> || </td>
408<td> String concatenation </td>
409<td> SELECT VALUE &#x201c;ab&#x201d;||&#x201c;c&#x201d;||&#x201c;d&#x201d;; </td></tr>
410</tbody>
411</table></div>
412<div class="section">
413<h3><a name="Collection_Operators"></a><a name="Collection_operators" id="Collection_operators">Collection Operators</a></h3>
414<p>Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS).</p>
415<table border="0" class="table table-striped">
416<thead>
417
418<tr class="a">
419<th> Operator </th>
420<th> Purpose </th>
421<th> Example </th></tr>
422</thead><tbody>
423
424<tr class="b">
425<td> IN </td>
426<td> Membership test </td>
427<td> FROM customers AS c <br />WHERE c.address.zipcode IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *; </td></tr>
428<tr class="a">
429<td> NOT IN </td>
430<td> Non-membership test </td>
431<td> FROM customers AS c <br />WHERE c.address.zipcode NOT IN [&#x201c;02340&#x201d;, &#x201c;02115&#x201d;] <br /> SELECT *;</td></tr>
432<tr class="b">
433<td> EXISTS </td>
434<td> Check whether a collection is not empty </td>
435<td> FROM orders AS o <br />WHERE EXISTS o.items <br /> SELECT *;</td></tr>
436<tr class="a">
437<td> NOT EXISTS </td>
438<td> Check whether a collection is empty </td>
439<td> FROM orders AS o <br />WHERE NOT EXISTS o.items <br /> SELECT *; </td></tr>
440</tbody>
441</table></div>
442<div class="section">
443<h3><a name="Comparison_Operators"></a><a name="Comparison_operators" id="Comparison_operators">Comparison Operators</a></h3>
444<p>Comparison operators are used to compare values. The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons. SQL++ (and JSON) has two ways of representing missing information in an object - the presence of the field with a NULL for its value (as in SQL), and the absence of the field (which JSON permits). For example, the first of the following objects represents Jack, whose friend is Jill. In the other examples, Jake is friendless a la SQL, with a friend field that is NULL, while Joe is friendless in a more natural (for JSON) way, i.e., by not having a friend field.</p>
445<div class="section">
446<div class="section">
447<h5><a name="Examples"></a>Examples</h5>
448<p>{&#x201c;name&#x201d;: &#x201c;Jack&#x201d;, &#x201c;friend&#x201d;: &#x201c;Jill&#x201d;}</p>
449<p>{&#x201c;name&#x201d;: &#x201c;Jake&#x201d;, &#x201c;friend&#x201d;: NULL}</p>
450<p>{&#x201c;name&#x201d;: &#x201c;Joe&#x201d;}</p>
451<p>The following table enumerates all of the comparison operators available in SQL++.</p>
452<table border="0" class="table table-striped">
453<thead>
454
455<tr class="a">
456<th> Operator </th>
457<th> Purpose </th>
458<th> Example </th></tr>
459</thead><tbody>
460
461<tr class="b">
462<td> IS NULL </td>
463<td> Test if a value is NULL </td>
464<td>FROM customers AS c <br />WHERE c.name IS NULL <br /> SELECT *; </td></tr>
465<tr class="a">
466<td> IS NOT NULL </td>
467<td> Test if a value is not NULL </td>
468<td> FROM customers AS c <br />WHERE c.name IS NOT NULL <br /> SELECT *; </td></tr>
469<tr class="b">
470<td> IS MISSING </td>
471<td> Test if a value is MISSING </td>
472<td> FROM customers AS c <br />WHERE c.name IS MISSING <br /> SELECT *; </td></tr>
473<tr class="a">
474<td> IS NOT MISSING </td>
475<td> Test if a value is not MISSING </td>
476<td> FROM customers AS c <br />WHERE c.name IS NOT MISSING <br /> SELECT *; </td></tr>
477<tr class="b">
478<td> IS UNKNOWN </td>
479<td> Test if a value is NULL or MISSING </td>
480<td> FROM customers AS c <br />WHERE c.name IS UNKNOWN <br /> SELECT *; </td></tr>
481<tr class="a">
482<td> IS NOT UNKNOWN </td>
483<td> Test if a value is neither NULL nor MISSING </td>
484<td> FROM customers AS c <br />WHERE c.name IS NOT UNKNOWN <br /> SELECT *; </td></tr>
485<tr class="b">
486<td> IS KNOWN (IS VALUED) </td>
487<td> Test if a value is neither NULL nor MISSING </td>
488<td> FROM customers AS c <br />WHERE c.name IS KNOWN <br /> SELECT *; </td></tr>
489<tr class="a">
490<td> IS NOT KNOWN (IS NOT VALUED) </td>
491<td> Test if a value is NULL or MISSING </td>
492<td> FROM customers AS c <br />WHERE c.name IS NOT KNOWN <br /> SELECT *; </td></tr>
493<tr class="b">
494<td> BETWEEN </td>
495<td> Test if a value is between a start value and a end value. The comparison is inclusive of both the start and end values. </td>
496<td> FROM customers AS c WHERE c.rating BETWEEN 600 AND 700 SELECT *;</td></tr>
497<tr class="a">
498<td> = </td>
499<td> Equality test </td>
500<td> FROM customers AS c <br /> WHERE c.rating = 640 <br /> SELECT *; </td></tr>
501<tr class="b">
502<td> != </td>
503<td> Inequality test </td>
504<td> FROM customers AS c <br /> WHERE c.rating != 640 <br /> SELECT *;</td></tr>
505<tr class="a">
506<td> &lt;&gt; </td>
507<td> Inequality test </td>
508<td> FROM customers AS c <br /> WHERE c.rating &lt;&gt; 640 <br /> SELECT *;</td></tr>
509<tr class="b">
510<td> &lt; </td>
511<td> Less than </td>
512<td> FROM customers AS c <br /> WHERE c.rating &lt; 640 <br /> SELECT *; </td></tr>
513<tr class="a">
514<td> &gt; </td>
515<td> Greater than </td>
516<td> FROM customers AS c <br /> WHERE c.rating &gt; 640 <br /> SELECT *; </td></tr>
517<tr class="b">
518<td> &lt;= </td>
519<td> Less than or equal to </td>
520<td> FROM customers AS c <br /> WHERE c.rating &lt;= 640 <br /> SELECT *; </td></tr>
521<tr class="a">
522<td> &gt;= </td>
523<td> Greater than or equal to </td>
524<td> FROM customers AS c <br /> WHERE c.rating &gt;= 640 <br /> SELECT *; </td></tr>
525<tr class="b">
526<td> LIKE </td>
527<td> Test if the left side matches a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
528<td> FROM customers AS c WHERE c.name LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
529<tr class="a">
530<td> NOT LIKE </td>
531<td> Test if the left side does not match a pattern defined on the right side; in the pattern, &#x201c;%&#x201d; matches any string while &#x201c;_&#x201d; matches any character. </td>
532<td> FROM customers AS c WHERE c.name NOT LIKE &#x201c;%Dodge%&#x201d; SELECT *;</td></tr>
533</tbody>
534</table>
535<p>The following table summarizes how the missing value comparison operators work.</p>
536<table border="0" class="table table-striped">
537<thead>
538
539<tr class="a">
540<th> Operator </th>
541<th> Non-NULL/Non-MISSING value </th>
542<th> NULL value</th>
543<th> MISSING value</th></tr>
544</thead><tbody>
545
546<tr class="b">
547<td> IS NULL </td>
548<td> FALSE </td>
549<td> TRUE </td>
550<td> MISSING </td></tr>
551<tr class="a">
552<td> IS NOT NULL </td>
553<td> TRUE </td>
554<td> FALSE </td>
555<td> MISSING </td></tr>
556<tr class="b">
557<td> IS MISSING </td>
558<td> FALSE </td>
559<td> FALSE </td>
560<td> TRUE </td></tr>
561<tr class="a">
562<td> IS NOT MISSING </td>
563<td> TRUE </td>
564<td> TRUE </td>
565<td> FALSE </td></tr>
566<tr class="b">
567<td> IS UNKNOWN </td>
568<td> FALSE </td>
569<td> TRUE </td>
570<td> TRUE </td></tr>
571<tr class="a">
572<td> IS NOT UNKNOWN </td>
573<td> TRUE </td>
574<td> FALSE </td>
575<td> FALSE</td></tr>
576<tr class="b">
577<td> IS KNOWN (IS VALUED) </td>
578<td> TRUE </td>
579<td> FALSE </td>
580<td> FALSE </td></tr>
581<tr class="a">
582<td> IS NOT KNOWN (IS NOT VALUED) </td>
583<td> FALSE </td>
584<td> TRUE </td>
585<td> TRUE </td></tr>
586</tbody>
587</table></div></div></div>
588<div class="section">
589<h3><a name="Logical_Operators"></a><a name="Logical_operators" id="Logical_operators">Logical Operators</a></h3>
590<p>Logical operators perform logical <tt>NOT</tt>, <tt>AND</tt>, and <tt>OR</tt> operations over Boolean values (<tt>TRUE</tt> and <tt>FALSE</tt>) plus <tt>NULL</tt> and <tt>MISSING</tt>.</p>
591<table border="0" class="table table-striped">
592<thead>
593
594<tr class="a">
595<th> Operator </th>
596<th> Purpose </th>
597<th> Example </th></tr>
598</thead><tbody>
599
600<tr class="b">
601<td> NOT </td>
602<td> Returns true if the following condition is false, otherwise returns false </td>
603<td> SELECT VALUE NOT 1 = 1; <br /> Returns FALSE </td></tr>
604<tr class="a">
605<td> AND </td>
606<td> Returns true if both branches are true, otherwise returns false </td>
607<td> SELECT VALUE 1 = 2 AND 1 = 1; <br /> Returns FALSE</td></tr>
608<tr class="b">
609<td> OR </td>
610<td> Returns true if one branch is true, otherwise returns false </td>
611<td> SELECT VALUE 1 = 2 OR 1 = 1; <br /> Returns TRUE </td></tr>
612</tbody>
613</table>
614<p>The following table is the truth table for <tt>AND</tt> and <tt>OR</tt>.</p>
615<table border="0" class="table table-striped">
616<thead>
617
618<tr class="a">
619<th> A </th>
620<th> B </th>
621<th> A AND B </th>
622<th> A OR B </th></tr>
623</thead><tbody>
624
625<tr class="b">
626<td> TRUE </td>
627<td> TRUE </td>
628<td> TRUE </td>
629<td> TRUE </td></tr>
630<tr class="a">
631<td> TRUE </td>
632<td> FALSE </td>
633<td> FALSE </td>
634<td> TRUE </td></tr>
635<tr class="b">
636<td> TRUE </td>
637<td> NULL </td>
638<td> NULL </td>
639<td> TRUE </td></tr>
640<tr class="a">
641<td> TRUE </td>
642<td> MISSING </td>
643<td> MISSING </td>
644<td> TRUE </td></tr>
645<tr class="b">
646<td> FALSE </td>
647<td> FALSE </td>
648<td> FALSE </td>
649<td> FALSE </td></tr>
650<tr class="a">
651<td> FALSE </td>
652<td> NULL </td>
653<td> FALSE </td>
654<td> NULL </td></tr>
655<tr class="b">
656<td> FALSE </td>
657<td> MISSING </td>
658<td> FALSE </td>
659<td> MISSING </td></tr>
660<tr class="a">
661<td> NULL </td>
662<td> NULL </td>
663<td> NULL </td>
664<td> NULL </td></tr>
665<tr class="b">
666<td> NULL </td>
667<td> MISSING </td>
668<td> MISSING </td>
669<td> NULL </td></tr>
670<tr class="a">
671<td> MISSING </td>
672<td> MISSING </td>
673<td> MISSING </td>
674<td> MISSING </td></tr>
675</tbody>
676</table>
677<p>The following table demonstrates the results of <tt>NOT</tt> on all possible inputs.</p>
678<table border="0" class="table table-striped">
679<thead>
680
681<tr class="a">
682<th> A </th>
683<th> NOT A </th></tr>
684</thead><tbody>
685
686<tr class="b">
687<td> TRUE </td>
688<td> FALSE </td></tr>
689<tr class="a">
690<td> FALSE </td>
691<td> TRUE </td></tr>
692<tr class="b">
693<td> NULL </td>
694<td> NULL </td></tr>
695<tr class="a">
696<td> MISSING </td>
697<td> MISSING </td></tr>
698</tbody>
699</table></div></div>
700<div class="section">
701<h2><a name="Quantified_Expressions"></a><a name="Quantified_expressions" id="Quantified_expressions">Quantified Expressions</a></h2><hr />
702<div class="section">
703<h3><a name="QuantifiedExpr"></a>QuantifiedExpr</h3>
704<p><b><img src="../images/diagrams/QuantifiedExpr.png" alt="" /></b></p>
705<div class="section">
706<div class="section">
707<h5><a name="Synonym_for_SOME:_ANY"></a>Synonym for <tt>SOME</tt>: <tt>ANY</tt></h5><hr />
708<p>Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.</p>
709<p>The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the set [1, 2, 3] of integers is less than three. The first example yields <tt>FALSE</tt> and second example yields <tt>TRUE</tt>.</p>
710<p>It is useful to note that if the set were instead the empty set, the first expression would yield <tt>TRUE</tt> (&#x201c;every&#x201d; value in an empty set satisfies the condition) while the second expression would yield <tt>FALSE</tt> (since there isn&#x2019;t &#x201c;some&#x201d; value, as there are no values in the set, that satisfies the condition).</p>
711<p>A quantified expression will return a <tt>NULL</tt> (or <tt>MISSING</tt>) if the first expression in it evaluates to <tt>NULL</tt> (or <tt>MISSING</tt>). Otherwise, a type error will be raised if the first expression in a quantified expression does not return a collection.</p></div>
712<div class="section">
713<h5><a name="Examples"></a>Examples</h5>
714
715<div>
716<div>
717<pre class="source">EVERY x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 Returns FALSE
718SOME x IN [ 1, 2, 3 ] SATISFIES x &lt; 3 Returns TRUE
719</pre></div></div>
720</div></div></div></div>
721<div class="section">
722<h2><a name="Path_Expressions"></a><a name="Path_expressions" id="Path_expressions">Path Expressions</a></h2><hr />
723<div class="section">
724<h3><a name="PathExpr"></a>PathExpr</h3>
725<p><b><img src="../images/diagrams/PathExpr.png" alt="" /></b></p><hr />
726<p>Components of complex types in the data model are accessed via path expressions. Path access can be applied to the result of a query expression that yields an instance of a complex type, for example, an object or an array instance.</p>
727<p>For objects, path access is based on field names, and it accesses the field whose name was specified.<br /></p>
728<p>For arrays, path access is based on (zero-based) array-style indexing. Array indices can be used to retrieve either a single element from an array, or a whole subset of an array. Accessing a single element is achieved by providing a single index argument (zero-based element position), while obtaining a subset of an array is achieved by providing the <tt>start</tt> and <tt>end</tt> (zero-based) index positions; the returned subset is from position <tt>start</tt> to position <tt>end - 1</tt>; the <tt>end</tt> position argument is optional. If a position argument is negative then the element position is counted from the end of the array (<tt>-1</tt> addresses the last element, <tt>-2</tt> next to last, and so on).</p>
729<p>Multisets have similar behavior to arrays, except for retrieving arbitrary items as the order of items is not fixed in multisets.</p>
730<p>Attempts to access non-existent fields or out-of-bound array elements produce the special value <tt>MISSING</tt>. Type errors will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.</p>
731<p>The following examples illustrate field access for an object, index-based element access or subset retrieval of an array, and also a composition thereof.</p>
732<div class="section">
733<div class="section">
734<h5><a name="Examples"></a>Examples</h5>
735
736<div>
737<div>
738<pre class="source">({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array Returns [[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]
739
740([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[2] Returns [&quot;c&quot;]
741
742([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-1] Returns [&quot;c&quot;]
743
744({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array[2] Returns [&quot;c&quot;]
745
746([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:2] Returns [[&quot;a&quot;, &quot;b&quot;]]
747
748([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[0:] Returns [[&quot;a&quot;, &quot;b&quot;, &quot;c&quot;]]
749
750([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[-2:-1] Returns [[&quot;b&quot;]]
751</pre></div></div>
752</div></div></div></div>
753<div class="section">
754<h2><a name="Primary_Expressions"></a><a name="Primary_expressions" id="Primary_expressions">Primary Expressions</a></h2><hr />
755<div class="section">
756<h3><a name="PrimaryExpr"></a>PrimaryExpr</h3>
757<p><b><img src="../images/diagrams/PrimaryExpr.png" alt="" /></b></p><hr />
758<p>The most basic building block for any expression in SQL++ is PrimaryExpression. This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object, array, or multiset of data model instances).</p></div>
759<div class="section">
760<h3><a name="Literals" id="Literals">Literals</a></h3><hr /></div>
761<div class="section">
762<h3><a name="Literal"></a>Literal</h3>
763<p><b><img src="../images/diagrams/Literal.png" alt="" /></b></p><hr />
764<p>The simplest kind of expression is a literal that directly represents a value in JSON format. Here are some examples:</p>
765
766<div>
767<div>
768<pre class="source">-42
769&quot;Hello&quot;
770true
771false
772null
773</pre></div></div>
774
775<p>Numeric literals may include a sign and an optional decimal point. They may also be written in exponential notation, like this:</p>
776
777<div>
778<div>
779<pre class="source">5e2
780-4.73E-2
781</pre></div></div>
782
783<p>String literals may be enclosed in either single quotes or double quotes. Inside a string literal, the delimiter character for that string must be &#x201c;escaped&#x201d; by a backward slash, as in these examples:</p>
784
785<div>
786<div>
787<pre class="source">&quot;I read \&quot;War and Peace\&quot; today.&quot;
788'I don\'t believe everything I read.'
789</pre></div></div>
790
791<p>The table below shows how to escape characters in SQL++</p>
792<table border="0" class="table table-striped">
793<thead>
794
795<tr class="a">
796<th>Character Name </th>
797<th>Escape Method</th></tr>
798</thead><tbody>
799
800<tr class="b">
801<td>Single Quote</td>
802<td> <tt>\'</tt></td></tr>
803<tr class="a">
804<td>Double Quote</td>
805<td><tt>\&quot;</tt></td></tr>
806<tr class="b">
807<td>Backslash</td>
808<td><tt>\\</tt></td></tr>
809<tr class="a">
810<td>Slash</td>
811<td><tt>\/</tt></td></tr>
812<tr class="b">
813<td>Backspace</td>
814<td><tt>\b</tt></td></tr>
815<tr class="a">
816<td>Formfeed</td>
817<td><tt>\f</tt></td></tr>
818<tr class="b">
819<td>Newline</td>
820<td><tt>\n</tt></td></tr>
821<tr class="a">
822<td>CarriageReturn</td>
823<td><tt>\r</tt></td></tr>
824<tr class="b">
825<td>EscapeTab</td>
826<td><tt>\t</tt></td></tr>
827</tbody>
828</table></div>
829<div class="section">
830<h3><a name="Identifiers_and_Variable_References"></a><a name="Variable_references" id="Variable_references">Identifiers and Variable References</a></h3>
831<p>Like SQL, SQL++ makes use of a language construct called an <i>identifier</i>. An identifier starts with an alphabetic character or the underscore character _ , and contains only case-sensitive alphabetic characters, numeric digits, or the special characters _ and $. It is also possible for an identifier to include other special characters, or to be the same as a reserved word, by enclosing the identifier in back-ticks (it&#x2019;s then called a <i>delimited identifier</i>). Identifiers are used in variable names and in certain other places in SQL++ syntax, such as in path expressions, which we&#x2019;ll discuss soon. Here are some examples of identifiers:</p>
832
833<div>
834<div>
835<pre class="source">X
836customer_name
837`SELECT`
838`spaces in here`
839`@&amp;#`
840</pre></div></div>
841
842<p>A very simple kind of SQL++ expression is a variable, which is simply an identifier. As in SQL, a variable can be bound to a value, which may be an input dataset, some intermediate result during processing of a query, or the final result of a query. We&#x2019;ll learn more about variables when we discuss queries.</p>
843<p>Note that the SQL++ rules for delimiting strings and identifiers are different from the SQL rules. In SQL, strings are always enclosed in single quotes, and double quotes are used for delimited identifiers.</p></div>
844<div class="section">
845<h3><a name="Parameter_References"></a><a name="Parameter_references" id="Parameter_references">Parameter References</a></h3>
846<p>A parameter reference is an external variable. Its value is provided using the <a href="../api.html#queryservice">statement execution API</a>.</p>
847<p>Parameter references come in two forms, <i>Named Parameter References</i> and <i>Positional Parameter References.</i></p>
848<p>Named paramater references consist of the &#x201c;$&#x201d; symbol followed by an identifier or delimited identifier.</p>
849<p>Positional parameter references can be either a &#x201c;$&#x201d; symbol followed by one or more digits or a &#x201c;?&#x201d; symbol. If numbered, positional parameters start at 1. &#x201c;?&#x201d; parameters are interpreted as $1 to $N based on the order in which they appear in the statement.</p>
850<p>Parameter references may appear as shown in the below examples:</p>
851<div class="section">
852<div class="section">
853<h5><a name="Examples"></a>Examples</h5>
854
855<div>
856<div>
857<pre class="source">$id
858$1
859?
860</pre></div></div>
861
862<p>An error will be raised in the parameter is not bound at query execution time.</p></div></div></div>
863<div class="section">
864<h3><a name="Parenthesized_Expressions"></a><a name="Parenthesized_expressions" id="Parenthesized_expressions">Parenthesized Expressions</a></h3><hr /></div>
865<div class="section">
866<h3><a name="ParenthesizedExpr"></a>ParenthesizedExpr</h3>
867<p><b><img src="../images/diagrams/ParenthesizedExpr.png" alt="" /></b></p></div>
868<div class="section">
869<h3><a name="Subquery"></a>Subquery</h3>
870<p><b><img src="../images/diagrams/Subquery.png" alt="" /></b></p><hr />
871<p>An expression can be parenthesized to control the precedence order or otherwise clarify a query. A <a href="#Subqueries">subquery</a> (nested <a href="#Union_all">selection</a>) may also be enclosed in parentheses. For more on these topics please see their respective sections.</p>
872<p>The following expression evaluates to the value 2.</p>
873<div class="section">
874<div class="section">
875<h5><a name="Example"></a>Example</h5>
876
877<div>
878<div>
879<pre class="source">( 1 + 1 )
880</pre></div></div>
881</div></div></div>
882<div class="section">
883<h3><a name="Function_Calls"></a><a name="Function_call_expressions" id="Function_call_expressions">Function Calls</a></h3><hr /></div>
884<div class="section">
885<h3><a name="FunctionCall"></a>FunctionCall</h3>
886<p><b><img src="../images/diagrams/FunctionCall.png" alt="" /></b></p></div>
887<div class="section">
888<h3><a name="OrdinaryFunctionCall"></a>OrdinaryFunctionCall</h3>
889<p><b><img src="../images/diagrams/OrdinaryFunctionCall.png" alt="" /></b></p></div>
890<div class="section">
891<h3><a name="AggregateFunctionCall"></a>AggregateFunctionCall</h3>
892<p><b><img src="../images/diagrams/AggregateFunctionCall.png" alt="" /></b></p><hr />
893<p>Functions are included in SQL++, like most languages, as a way to package useful functionality or to componentize complicated or reusable computations. A function call is a legal query expression that represents the value resulting from the evaluation of its body expression with the given parameter bindings; the parameter value bindings can themselves be any expressions in SQL++.</p>
894<p>Note that Window functions, and aggregate functions used as window functions, have a more complex syntax. Window function calls are described in the section on <a href="#Over_clauses">Window Queries</a>.</p>
895<p>The following example is a function call expression whose value is 8.</p>
896<div class="section">
897<div class="section">
898<h5><a name="Example"></a>Example</h5>
899
900<div>
901<div>
902<pre class="source">length('a string')
903</pre></div></div>
904</div></div></div></div>
905<div class="section">
906<h2><a name="Case_Expressions"></a><a name="Case_expressions" id="Case_expressions">Case Expressions</a></h2><hr />
907<div class="section">
908<h3><a name="CaseExpr"></a>CaseExpr</h3>
909<p><b><img src="../images/diagrams/CaseExpr.png" alt="" /></b></p></div>
910<div class="section">
911<h3><a name="SimpleCaseExpr"></a>SimpleCaseExpr</h3>
912<p><b><img src="../images/diagrams/SimpleCaseExpr.png" alt="" /></b></p></div>
913<div class="section">
914<h3><a name="SearchedCaseExpr"></a>SearchedCaseExpr</h3>
915<p><b><img src="../images/diagrams/SearchedCaseExpr.png" alt="" /></b></p><hr />
916<p>In a simple <tt>CASE</tt> expression, the query evaluator searches for the first <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pair in which the <tt>WHEN</tt> expression is equal to the expression following <tt>CASE</tt> and returns the expression following <tt>THEN</tt>. If none of the <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pairs meet this condition, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, <tt>NULL</tt> is returned.</p>
917<p>In a searched CASE expression, the query evaluator searches from left to right until it finds a <tt>WHEN</tt> expression that is evaluated to <tt>TRUE</tt>, and then returns its corresponding <tt>THEN</tt> expression. If no condition is found to be <tt>TRUE</tt>, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, it returns <tt>NULL</tt>.</p>
918<p>The following example illustrates the form of a case expression.</p>
919<div class="section">
920<div class="section">
921<h5><a name="Example"></a>Example</h5>
922
923<div>
924<div>
925<pre class="source">CASE (2 &lt; 3) WHEN true THEN &quot;yes&quot; ELSE &quot;no&quot; END
926</pre></div></div>
927</div></div></div>
928<div class="section">
929<h3><a name="Constructors" id="Constructors">Constructors</a></h3><hr /></div>
930<div class="section">
931<h3><a name="Constructor"></a>Constructor</h3>
932<p><b><img src="../images/diagrams/Constructor.png" alt="" /></b></p></div>
933<div class="section">
934<h3><a name="ObjectConstructor"></a>ObjectConstructor</h3>
935<p><b><img src="../images/diagrams/ObjectConstructor.png" alt="" /></b></p></div>
936<div class="section">
937<h3><a name="ArrayConstructor"></a>ArrayConstructor</h3>
938<p><b><img src="../images/diagrams/ArrayConstructor.png" alt="" /></b></p></div>
939<div class="section">
940<h3><a name="MultisetConstructor"></a>MultisetConstructor</h3>
941<p><b><img src="../images/diagrams/MultisetConstructor.png" alt="" /></b></p><hr />
942<p>Structured JSON values can be represented by constructors, as in these examples:</p>
943
944<div>
945<div>
946<pre class="source">An object: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 42 }
947An array: [ 1, 2, &quot;Hello&quot;, null ]
948</pre></div></div>
949
950<p>In a constructed object, the names of the fields must be strings (either literal strings or computed strings), and an object may not contain any duplicate names. Of course, structured literals can be nested, as in this example:</p>
951
952<div>
953<div>
954<pre class="source">[ {&quot;name&quot;: &quot;Bill&quot;,
955 &quot;address&quot;:
956 {&quot;street&quot;: &quot;25 Main St.&quot;,
957 &quot;city&quot;: &quot;Cincinnati, OH&quot;
958 }
959 },
960 {&quot;name&quot;: &quot;Mary&quot;,
961 &quot;address&quot;:
962 {&quot;street&quot;: &quot;107 Market St.&quot;,
963 &quot;city&quot;: &quot;St. Louis, MO&quot;
964 }
965 }
966]
967</pre></div></div>
968
969<p>The array items in an array constructor, and the field-names and field-values in an object constructor, may be represented by expressions. For example, suppose that the variables firstname, lastname, salary, and bonus are bound to appropriate values. Then structured values might be constructed by the following expressions:</p>
970<p>An object:</p>
971
972<div>
973<div>
974<pre class="source">{
975 &quot;name&quot;: firstname || &quot; &quot; || lastname,
976 &quot;income&quot;: salary + bonus
977}
978</pre></div></div>
979
980<p>An array:</p>
981
982<div>
983<div>
984<pre class="source">[&quot;1984&quot;, lastname, salary + bonus, null]
985</pre></div></div>
986
987<p>If only one expression is specified instead of the field-name/field-value pair in an object constructor then this expression is supposed to provide the field value. The field name is then automatically generated based on the kind of the value expression as in Q2.1:</p>
988<ul>
989
990<li>If it is a variable reference expression then the generated field name is the name of that variable.</li>
991<li>If it is a field access expression then the generated field name is the last identifier in that expression.</li>
992<li>For all other cases, a compilation error will be raised.</li>
993</ul>
994<div class="section">
995<div class="section">
996<h5><a name="Example"></a>Example</h5>
997<p>(Q2.1)</p>
998
999<div>
1000<div>
1001<pre class="source">FROM customers AS c
1002WHERE c.custid = &quot;C47&quot;
1003SELECT VALUE {c.name, c.rating}
1004</pre></div></div>
1005
1006<p>This query outputs:</p>
1007
1008<div>
1009<div>
1010<pre class="source">[
1011 {
1012 &quot;name&quot;: &quot;S. Logan&quot;,
1013 &quot;rating&quot;: 625
1014 }
1015]
1016</pre></div></div>
1017<!--
1018 ! Licensed to the Apache Software Foundation (ASF) under one
1019 ! or more contributor license agreements. See the NOTICE file
1020 ! distributed with this work for additional information
1021 ! regarding copyright ownership. The ASF licenses this file
1022 ! to you under the Apache License, Version 2.0 (the
1023 ! "License"); you may not use this file except in compliance
1024 ! with the License. You may obtain a copy of the License at
1025 !
1026 ! http://www.apache.org/licenses/LICENSE-2.0
1027 !
1028 ! Unless required by applicable law or agreed to in writing,
1029 ! software distributed under the License is distributed on an
1030 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1031 ! KIND, either express or implied. See the License for the
1032 ! specific language governing permissions and limitations
1033 ! under the License.
1034 !-->
1035
1036<h1><a name="Queries" id="Queries">3. Queries</a></h1>
1037<p>A <i>query</i> can be an expression, or it can be constructed from blocks of code called <i>query blocks</i>. A query block may contain several clauses, including <tt>SELECT</tt>, <tt>FROM</tt>, <tt>LET</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt>.</p><hr /></div></div></div>
1038<div class="section">
1039<h3><a name="Query"></a>Query</h3>
1040<p><b><img src="../images/diagrams/Query.png" alt="" /></b></p></div>
1041<div class="section">
1042<h3><a name="Selection"></a>Selection</h3>
1043<p><b><img src="../images/diagrams/Selection.png" alt="" /></b></p></div>
1044<div class="section">
1045<h3><a name="QueryBlock"></a>QueryBlock</h3>
1046<p><b><img src="../images/diagrams/QueryBlock.png" alt="" /></b></p></div>
1047<div class="section">
1048<h3><a name="StreamGenerator"></a>StreamGenerator</h3>
1049<p><b><img src="../images/diagrams/StreamGenerator.png" alt="" /></b></p><hr />
1050<p>Note that, unlike SQL, SQL++ allows the <tt>SELECT</tt> clause to appear either at the beginning or at the end of a query block. For some queries, placing the <tt>SELECT</tt> clause at the end may make a query block easier to understand, because the <tt>SELECT</tt> clause refers to variables defined in the other clauses.</p><!--
1051 ! Licensed to the Apache Software Foundation (ASF) under one
1052 ! or more contributor license agreements. See the NOTICE file
1053 ! distributed with this work for additional information
1054 ! regarding copyright ownership. The ASF licenses this file
1055 ! to you under the Apache License, Version 2.0 (the
1056 ! "License"); you may not use this file except in compliance
1057 ! with the License. You may obtain a copy of the License at
1058 !
1059 ! http://www.apache.org/licenses/LICENSE-2.0
1060 !
1061 ! Unless required by applicable law or agreed to in writing,
1062 ! software distributed under the License is distributed on an
1063 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1064 ! KIND, either express or implied. See the License for the
1065 ! specific language governing permissions and limitations
1066 ! under the License.
1067 !-->
1068</div></div>
1069<div class="section">
1070<h2><a name="SELECT_Clause"></a><a name="Select_clauses" id="Select_clauses">SELECT Clause</a></h2><hr />
1071<div class="section">
1072<h3><a name="SelectClause"></a>SelectClause</h3>
1073<p><b><img src="../images/diagrams/SelectClause.png" alt="" /></b></p>
1074<div class="section">
1075<div class="section">
1076<h5><a name="Synonyms_for_VALUE:_ELEMENT.2C_RAW"></a>Synonyms for <tt>VALUE</tt>: <tt>ELEMENT</tt>, <tt>RAW</tt></h5><hr />
1077<p>In a query block, the <tt>FROM</tt>, <tt>WHERE</tt>, <tt>GROUP BY</tt>, and <tt>HAVING</tt> clauses (if present) are collectively called the Stream Generator. All these clauses, taken together, generate a stream of tuples of bound variables. The <tt>SELECT</tt> clause then uses these bound variables to generate the output of the query block.</p>
1078<p>For example, the clause <tt>FROM customers AS c</tt> scans over the <tt>customers</tt> collection, binding the variable <tt>c</tt> to each customer` object in turn, producing a stream of bindings.</p>
1079<p>Here&#x2019;s a slightly more complex example of a stream generator:</p></div>
1080<div class="section">
1081<h5><a name="Example"></a>Example</h5>
1082
1083<div>
1084<div>
1085<pre class="source">FROM customers AS c, orders AS o
1086WHERE c.custid = o.custid
1087</pre></div></div>
1088
1089<p>In this example, the <tt>FROM</tt> clause scans over the customers and orders collections, producing a stream of variable pairs (<tt>c</tt>, <tt>o</tt>) in which <tt>c</tt> is bound to a <tt>customer</tt> object and <tt>o</tt> is bound to an <tt>order</tt> object. The <tt>WHERE</tt> clause then retains only those pairs in which the custid values of the two objects match.</p>
1090<p>The output of the query block is a collection containing one output item for each tuple produced by the stream generator. If the stream generator produces no tuples, the output of the query block is an empty collection. Depending on the <tt>SELECT</tt> clause, each output item may be an object or some other kind of value.</p>
1091<p>In addition to using the variables bound by previous clauses, the <tt>SELECT</tt> clause may create and bind some additional variables. For example, the clause <tt>SELECT salary + bonus AS pay</tt> creates the variable <tt>pay</tt> and binds it to the value of <tt>salary + bonus</tt>. This variable may then be used in a later <tt>ORDER BY</tt> clause.</p>
1092<p>In SQL++, the <tt>SELECT</tt> clause may appear either at the beginning or at the end of a query block. Since the <tt>SELECT</tt> clause depends on variables that are bound in the other clauses, the examples in this section place <tt>SELECT</tt> at the end of the query blocks.</p></div></div></div>
1093<div class="section">
1094<h3><a name="SELECT_VALUE"></a><a name="Select_element" id="Select_element">SELECT VALUE</a></h3>
1095<p>The <tt>SELECT VALUE</tt> clause returns an array or multiset that contains the results of evaluating the <tt>VALUE</tt> expression, with one evaluation being performed per &#x201c;binding tuple&#x201d; (i.e., per <tt>FROM</tt> clause item) satisfying the statement&#x2019;s selection criteria. If there is no <tt>FROM</tt> clause, the expression after <tt>VALUE</tt> is evaluated once with no binding tuples (except those inherited from an outer environment).</p>
1096<div class="section">
1097<div class="section">
1098<h5><a name="Example"></a>Example</h5>
1099<p>(Q3.1)</p>
1100
1101<div>
1102<div>
1103<pre class="source">SELECT VALUE 1;
1104</pre></div></div>
1105
1106<p>Result:</p>
1107
1108<div>
1109<div>
1110<pre class="source">[
1111 1
1112]
1113</pre></div></div>
1114</div>
1115<div class="section">
1116<h5><a name="Example"></a>Example</h5>
1117<p>(Q3.2) The following query returns the names of all customers whose rating is above 650.</p>
1118
1119<div>
1120<div>
1121<pre class="source">FROM customers AS c
1122WHERE c.rating &gt; 650
1123SELECT VALUE name;
1124</pre></div></div>
1125
1126<p>Result:</p>
1127
1128<div>
1129<div>
1130<pre class="source">RESULT:
1131[
1132 &quot;T. Cody&quot;,
1133 &quot;M. Sinclair&quot;,
1134 &quot;T. Henry&quot;
1135]
1136</pre></div></div>
1137</div></div></div>
1138<div class="section">
1139<h3><a name="SQL-style_SELECT"></a><a name="SQL_select" id="SQL_select">SQL-style SELECT</a></h3>
1140<p>Traditional SQL-style <tt>SELECT</tt> syntax is also supported in SQL++, however the result of a query is not guaranteed to preserve the order of expressions in the <tt>SELECT</tt> clause.</p>
1141<div class="section">
1142<div class="section">
1143<h5><a name="Example"></a>Example</h5>
1144<p>(Q3.3) The following query returns the names and customers ids of any customers whose rating is 750.</p>
1145
1146<div>
1147<div>
1148<pre class="source">FROM customers AS c
1149WHERE c.rating = 750
1150SELECT c.name AS customer_name, c.custid AS customer_id;
1151</pre></div></div>
1152
1153<p>Returns:</p>
1154
1155<div>
1156<div>
1157<pre class="source">[
1158 {
1159 &quot;customer_id&quot;: &quot;C13&quot;,
1160 &quot;customer_name&quot;: &quot;T. Cody&quot;
1161 },
1162 {
1163 &quot;customer_id&quot;: &quot;C37&quot;,
1164 &quot;customer_name&quot;: &quot;T. Henry&quot;
1165 }
1166]
1167</pre></div></div>
1168</div></div></div>
1169<div class="section">
1170<h3><a name="SELECT_.2A"></a><a name="Select_star" id="Select_star">SELECT *</a></h3>
1171<p>As in SQL, the phrase <tt>SELECT *</tt> suggests, &#x201c;select everything.&#x201d;</p>
1172<p>For each binding tuple in the stream, <tt>SELECT *</tt> produces an output object. For each variable in the binding tuple, the output object contains a field: the name of the field is the name of the variable, and the value of the field is the value of the variable. Essentially, <tt>SELECT *</tt> means, &#x201c;return all the bound variables, with their names and values.&#x201d;</p>
1173<p>The effect of <tt>SELECT *</tt> can be illustrated by an example based on two small collections named <tt>ages</tt> and <tt>eyes</tt>. The contents of the two collections are as follows:</p>
1174<p><tt>ages</tt>:</p>
1175
1176<div>
1177<div>
1178<pre class="source">[
1179 { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1180 { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
1181]
1182</pre></div></div>
1183
1184<p><tt>eyes</tt>:</p>
1185
1186<div>
1187<div>
1188<pre class="source">[
1189 { &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;brown&quot; },
1190 { &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;blue&quot; }
1191]
1192</pre></div></div>
1193
1194<p>The following example applies <tt>SELECT *</tt> to a single collection.</p>
1195<div class="section">
1196<div class="section">
1197<h5><a name="Example"></a>Example</h5>
1198<p>(Q3.4a) Return all the information in the <tt>ages</tt> collection.</p>
1199
1200<div>
1201<div>
1202<pre class="source">FROM ages AS a
1203SELECT * ;
1204</pre></div></div>
1205
1206<p>Result:</p>
1207
1208<div>
1209<div>
1210<pre class="source">[
1211 { &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1212 },
1213 { &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32}
1214 }
1215]
1216</pre></div></div>
1217
1218<p>Note that the variable-name <tt>a</tt> appears in the query result. If the <tt>FROM</tt> clause had been simply <tt>FROM ages</tt> (omitting <tt>AS a</tt>), the variable-name in the query result would have been <tt>ages</tt>.</p>
1219<p>The next example applies <tt>SELECT *</tt> to a join of two collections.</p></div>
1220<div class="section">
1221<h5><a name="Example"></a>Example</h5>
1222<p>(Q3.4b) Return all the information in a join of <tt>ages</tt> and <tt>eyes</tt> on matching name fields.</p>
1223
1224<div>
1225<div>
1226<pre class="source">FROM ages AS a, eyes AS e
1227WHERE a.name = e.name
1228SELECT * ;
1229</pre></div></div>
1230
1231<p>Result:</p>
1232
1233<div>
1234<div>
1235<pre class="source">[
1236 { &quot;a&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1237 &quot;e&quot;: { &quot;name&quot;: &quot;Bill&quot;, &quot;eyecolor&quot;: &quot;Brown&quot; }
1238 },
1239 { &quot;a&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 },
1240 &quot;e&quot;: { &quot;name&quot;: &quot;Sue&quot;, &quot;eyecolor&quot;: &quot;Blue&quot; }
1241 }
1242]
1243</pre></div></div>
1244
1245<p>Note that the result of <tt>SELECT *</tt> in SQL++ is more complex than the result of <tt>SELECT *</tt> in SQL.</p></div></div></div>
1246<div class="section">
1247<h3><a name="SELECT_variable..2A"></a><a name="Select_variable_star" id="Select_variable_star">SELECT <i>variable</i>.*</a></h3>
1248<p>SQL++ has an alternative version of <tt>SELECT *</tt> in which the star is preceded by a variable. Whereas <tt>SELECT *</tt> means, &#x201c;return all the bound variables, with their names and values,&#x201d; <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> means &#x201c;return only the named variable, and return only its value, not its name.&#x201d;</p>
1249<p>The following example can be compared with (Q3.4a) to see the difference between the two versions of <tt>SELECT *</tt>:</p>
1250<div class="section">
1251<div class="section">
1252<h5><a name="Example"></a>Example</h5>
1253<p>(Q3.4c) Return all information in the <tt>ages</tt> collection.</p>
1254
1255<div>
1256<div>
1257<pre class="source">FROM ages AS a
1258SELECT a.*
1259</pre></div></div>
1260
1261<p>Result:</p>
1262
1263<div>
1264<div>
1265<pre class="source">[
1266 { &quot;name&quot;: &quot;Bill&quot;, &quot;age&quot;: 21 },
1267 { &quot;name&quot;: &quot;Sue&quot;, &quot;age&quot;: 32 }
1268]
1269</pre></div></div>
1270
1271<p>Note that, for queries over a single collection, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt> returns a simpler result and therefore may be preferable to <tt>SELECT *</tt>. In fact, <tt>SELECT</tt> <i>variable</i> <tt>.*</tt>, like <tt>SELECT *</tt> in SQL, is equivalent to a <tt>SELECT</tt> clause that enumerates all the fields of the collection, as in (Q3.4d):</p></div>
1272<div class="section">
1273<h5><a name="Example"></a>Example</h5>
1274<p>(Q3.4d) Return all the information in the <tt>ages</tt> collection.</p>
1275
1276<div>
1277<div>
1278<pre class="source">FROM ages AS a
1279SELECT a.name, a.age
1280</pre></div></div>
1281
1282<p>(same result as (Q3.4c))</p>
1283<p><tt>SELECT</tt> <i>variable</i> <tt>.*</tt> has an additional application. It can be used to return all the fields of a nested object. To illustrate this use, we will use the <tt>customers</tt> dataset in the example database (see Appendix 4).</p></div>
1284<div class="section">
1285<h5><a name="Example"></a>Example</h5>
1286<p>(Q3.4e) In the <tt>customers</tt> dataset, return all the fields of the <tt>address</tt> objects that have zipcode &#x201c;02340&#x201d;.</p>
1287
1288<div>
1289<div>
1290<pre class="source">FROM customers AS c
1291WHERE c.address.zipcode = &quot;02340&quot;
1292SELECT address.* ;
1293</pre></div></div>
1294
1295<p>Result:</p>
1296
1297<div>
1298<div>
1299<pre class="source">[
1300 {
1301 &quot;street&quot;: &quot;690 River St.&quot;,
1302 &quot;city&quot;: &quot;Hanover, MA&quot;,
1303 &quot;zipcode&quot;: &quot;02340&quot;
1304 }
1305]
1306</pre></div></div>
1307</div></div></div>
1308<div class="section">
1309<h3><a name="SELECT_DISTINCT"></a><a name="Select_distinct" id="Select_distinct">SELECT DISTINCT</a></h3>
1310<p>The <tt>DISTINCT</tt> keyword is used to eliminate duplicate items from the results of a query block.</p>
1311<div class="section">
1312<div class="section">
1313<h5><a name="Example"></a>Example</h5>
1314<p>(Q3.5) Returns all of the different cities in the <tt>customers</tt> dataset.</p>
1315
1316<div>
1317<div>
1318<pre class="source">FROM customers AS c
1319SELECT DISTINCT c.address.city;
1320</pre></div></div>
1321
1322<p>Result:</p>
1323
1324<div>
1325<div>
1326<pre class="source">[
1327 {
1328 &quot;city&quot;: &quot;Boston, MA&quot;
1329 },
1330 {
1331 &quot;city&quot;: &quot;Hanover, MA&quot;
1332 },
1333 {
1334 &quot;city&quot;: &quot;St. Louis, MO&quot;
1335 },
1336 {
1337 &quot;city&quot;: &quot;Rome, Italy&quot;
1338 }
1339]
1340</pre></div></div>
1341</div></div></div>
1342<div class="section">
1343<h3><a name="Unnamed_Projections"></a><a name="Unnamed_projections" id="Unnamed_projections">Unnamed Projections</a></h3>
1344<p>Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed <tt>SELECT</tt> clause items), for which names are generated rather than user-provided. Name generation has three cases:</p>
1345<ul>
1346
1347<li>If a projection expression is a variable reference expression, its generated name is the name of the variable.</li>
1348<li>If a projection expression is a field access expression, its generated name is the last identifier in the expression.</li>
1349<li>For all other cases, the query processor will generate a unique name.</li>
1350</ul>
1351<div class="section">
1352<div class="section">
1353<h5><a name="Example"></a>Example</h5>
1354<p>(Q3.6) Returns the last digit and the order date of all orders for the customer whose ID is &#x201c;C41&#x201d;.</p>
1355
1356<div>
1357<div>
1358<pre class="source">FROM orders AS o
1359WHERE o.custid = &quot;C41&quot;
1360SELECT o.orderno % 1000, o.order_date;
1361</pre></div></div>
1362
1363<p>Result:</p>
1364
1365<div>
1366<div>
1367<pre class="source">[
1368 {
1369 &quot;$1&quot;: 1,
1370 &quot;order_date&quot;: &quot;2020-04-29&quot;
1371 },
1372 {
1373 &quot;$1&quot;: 6,
1374 &quot;order_date&quot;: &quot;2020-09-02&quot;
1375 }
1376]
1377</pre></div></div>
1378
1379<p>In the result, <tt>$1</tt> is the generated name for <tt>o.orderno % 1000</tt>, while <tt>order_date</tt> is the generated name for <tt>o.order_date</tt>. It is good practice, however, to not rely on the randomly generated names which can be confusing and irrelevant. Instead, practice good naming conventions by providing a meaningful and concise name which properly describes the selected item.</p></div></div></div>
1380<div class="section">
1381<h3><a name="Abbreviated_Field_Access_Expressions"></a><a name="Abbreviated_field_access_expressions" id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></h3>
1382<p>As in standard SQL, field access expressions can be abbreviated when there is no ambiguity. In the next example, the variable <tt>o</tt> is the only possible variable reference for fields <tt>orderno</tt> and <tt>order_date</tt> and thus could be omitted in the query. This practice is not recommended, however, as queries may have fields (such as <tt>custid</tt>) which can be present in multiple datasets. More information on abbbreviated field access can be found in the appendix section on Variable Resolution.</p>
1383<div class="section">
1384<div class="section">
1385<h5><a name="Example"></a>Example</h5>
1386<p>(Q3.7) Same as Q3.6, omitting the variable reference for the order number and date and providing custom names for <tt>SELECT</tt> clause items.</p>
1387
1388<div>
1389<div>
1390<pre class="source">FROM orders AS o
1391WHERE o.custid = &quot;C41&quot;
1392SELECT orderno % 1000 AS last_digit, order_date;
1393</pre></div></div>
1394
1395<p>Result:</p>
1396
1397<div>
1398<div>
1399<pre class="source">[
1400 {
1401 &quot;last_digit&quot;: 1,
1402 &quot;order_date&quot;: &quot;2020-04-29&quot;
1403 },
1404 {
1405 &quot;last_digit&quot;: 6,
1406 &quot;order_date&quot;: &quot;2020-09-02&quot;
1407 }
1408]
1409</pre></div></div>
1410</div></div></div></div>
1411<div class="section">
1412<h2><a name="FROM_clause"></a><a name="From_clauses" id="From_clauses">FROM clause</a></h2><hr />
1413<div class="section">
1414<h3><a name="FromClause"></a>FromClause</h3>
1415<p><b><img src="../images/diagrams/FromClause.png" alt="" /></b></p></div>
1416<div class="section">
1417<h3><a name="FromTerm"></a>FromTerm</h3>
1418<p><b><img src="../images/diagrams/FromTerm.png" alt="" /></b></p></div>
1419<div class="section">
1420<h3><a name="NamedExpr"></a>NamedExpr</h3>
1421<p><b><img src="../images/diagrams/NamedExpr.png" alt="" /></b></p></div>
1422<div class="section">
1423<h3><a name="JoinStep"></a>JoinStep</h3>
1424<p><b><img src="../images/diagrams/JoinStep.png" alt="" /></b></p>
1425<div class="section">
1426<div class="section">
1427<h5><a name="Synonyms_for_UNNEST:_CORRELATE.2C_FLATTEN"></a>Synonyms for <tt>UNNEST</tt>: <tt>CORRELATE</tt>, <tt>FLATTEN</tt></h5><hr />
1428<p>The purpose of a <tt>FROM</tt> clause is to iterate over a collection, binding a variable to each item in turn. Here&#x2019;s a query that iterates over the <tt>customers</tt> dataset, choosing certain customers and returning some of their attributes.</p></div>
1429<div class="section">
1430<h5><a name="Example"></a>Example</h5>
1431<p>(Q3.8) List the customer ids and names of the customers in zipcode 63101, in order by their customer IDs.</p>
1432
1433<div>
1434<div>
1435<pre class="source">FROM customers
1436WHERE address.zipcode = &quot;63101&quot;
1437SELECT custid AS customer_id, name
1438ORDER BY customer_id;
1439</pre></div></div>
1440
1441<p>Result:</p>
1442
1443<div>
1444<div>
1445<pre class="source">[
1446 {
1447 &quot;customer_id&quot;: &quot;C13&quot;,
1448 &quot;name&quot;: &quot;T. Cody&quot;
1449 },
1450 {
1451 &quot;customer_id&quot;: &quot;C31&quot;,
1452 &quot;name&quot;: &quot;B. Pruitt&quot;
1453 },
1454 {
1455 &quot;customer_id&quot;: &quot;C41&quot;,
1456 &quot;name&quot;: &quot;R. Dodge&quot;
1457 }
1458]
1459</pre></div></div>
1460
1461<p>Let&#x2019;s take a closer look at what this <tt>FROM</tt> clause is doing. A <tt>FROM</tt> clause always produces a stream of bindings, in which an iteration variable is bound in turn to each item in a collection. In Q3.8, since no explicit iteration variable is provided, the <tt>FROM</tt> clause defines an implicit variable named <tt>customers</tt>, the same name as the dataset that is being iterated over. The implicit iteration variable serves as the object-name for all field-names in the query block that do not have explicit object-names. Thus, <tt>address.zipcode</tt> really means <tt>customers.address.zipcode</tt>, <tt>custid</tt> really means <tt>customers.custid</tt>, and <tt>name</tt> really means <tt>customers.name</tt>.</p>
1462<p>You may also provide an explicit iteration variable, as in this version of the same query:</p></div>
1463<div class="section">
1464<h5><a name="Example"></a>Example</h5>
1465<p>(Q3.9) Alternative version of Q3.8 (same result).</p>
1466
1467<div>
1468<div>
1469<pre class="source">FROM customers AS c
1470WHERE c.address.zipcode = &quot;63101&quot;
1471SELECT c.custid AS customer_id, c.name
1472ORDER BY customer_id;
1473</pre></div></div>
1474
1475<p>In Q3.9, the variable <tt>c</tt> is bound to each <tt>customer</tt> object in turn as the query iterates over the <tt>customers</tt> dataset. An explicit iteration variable can be used to identify the fields of the referenced object, as in <tt>c.name</tt> in the <tt>SELECT</tt> clause of Q3.9. When referencing a field of an object, the iteration variable can be omitted when there is no ambiguity. For example, <tt>c.name</tt> could be replaced by <tt>name</tt> in the <tt>SELECT</tt> clause of Q3.9. That&#x2019;s why field-names like <tt>name</tt> and <tt>custid</tt> could stand by themselves in the Q3.8 version of this query.</p>
1476<p>In the examples above, the <tt>FROM</tt> clause iterates over the objects in a dataset. But in general, a <tt>FROM</tt> clause can iterate over any collection. For example, the objects in the <tt>orders</tt> dataset each contain a field called <tt>items</tt>, which is an array of nested objects. In some cases, you will write a <tt>FROM</tt> clause that iterates over a nested array like <tt>items</tt>.</p>
1477<p>The stream of objects (more accurately, variable bindings) that is produced by the <tt>FROM</tt> clause does not have any particular order. The system will choose the most efficient order for the iteration. If you want your query result to have a specific order, you must use an <tt>ORDER BY</tt> clause.</p>
1478<p>It&#x2019;s good practice to specify an explicit iteration variable for each collection in the <tt>FROM</tt> clause, and to use these variables to qualify the field-names in other clauses. Here are some reasons for this convention:</p>
1479<ul>
1480
1481<li>
1482
1483<p>It&#x2019;s nice to have different names for the collection as a whole and an object in the collection. For example, in the clause <tt>FROM customers AS c</tt>, the name <tt>customers</tt> represents the dataset and the name <tt>c</tt> represents one object in the dataset.</p>
1484</li>
1485<li>
1486
1487<p>In some cases, iteration variables are required. For example, when joining a dataset to itself, distinct iteration variables are required to distinguish the left side of the join from the right side.</p>
1488</li>
1489<li>
1490
1491<p>In a subquery it&#x2019;s sometimes necessary to refer to an object in an outer query block (this is called a <i>correlated subquery</i>). To avoid confusion in correlated subqueries, it&#x2019;s best to use explicit variables.</p>
1492</li>
1493</ul></div></div></div>
1494<div class="section">
1495<h3><a name="Joins"></a><a name="Left_outer_unnests" id="Left_outer_unnests">Joins</a></h3>
1496<p>A <tt>FROM</tt> clause gets more interesting when there is more than one collection involved. The following query iterates over two collections: <tt>customers</tt> and <tt>orders</tt>. The <tt>FROM</tt> clause produces a stream of binding tuples, each containing two variables, <tt>c</tt> and <tt>o</tt>. In each binding tuple, <tt>c</tt> is bound to an object from <tt>customers</tt>, and <tt>o</tt> is bound to an object from <tt>orders</tt>. Conceptually, at this point, the binding tuple stream contains all possible pairs of a customer and an order (this is called the <i>Cartesian product</i> of <tt>customers</tt> and <tt>orders</tt>). Of course, we are interested only in pairs where the <tt>custid</tt> fields match, and that condition is expressed in the <tt>WHERE</tt> clause, along with the restriction that the order number must be 1001.</p>
1497<div class="section">
1498<div class="section">
1499<h5><a name="Example"></a>Example</h5>
1500<p>(Q3.10) Create a packing list for order number 1001, showing the customer name and address and all the items in the order.</p>
1501
1502<div>
1503<div>
1504<pre class="source">FROM customers AS c, orders AS o
1505WHERE c.custid = o.custid
1506AND o.orderno = 1001
1507SELECT o.orderno,
1508 c.name AS customer_name,
1509 c.address,
1510 o.items AS items_ordered;
1511</pre></div></div>
1512
1513<p>Result:</p>
1514
1515<div>
1516<div>
1517<pre class="source">[
1518 {
1519 &quot;orderno&quot;: 1001,
1520 &quot;customer_name&quot;: &quot;R. Dodge&quot;,
1521 &quot;address&quot;: {
1522 &quot;street&quot;: &quot;150 Market St.&quot;,
1523 &quot;city&quot;: &quot;St. Louis, MO&quot;,
1524 &quot;zipcode&quot;: &quot;63101&quot;
1525 },
1526 &quot;items_ordered&quot;: [
1527 {
1528 &quot;itemno&quot;: 347,
1529 &quot;qty&quot;: 5,
1530 &quot;price&quot;: 19.99
1531 },
1532 {
1533 &quot;itemno&quot;: 193,
1534 &quot;qty&quot;: 2,
1535 &quot;price&quot;: 28.89
1536 }
1537 ]
1538 }
1539]
1540</pre></div></div>
1541
1542<p>Q3.10 is called a <i>join query</i> because it joins the <tt>customers</tt> collection and the <tt>orders</tt> collection, using the join condition <tt>c.custid = o.custid</tt>. In SQL++, as in SQL, you can express this query more explicitly by a <tt>JOIN</tt> clause that includes the join condition, as follows:</p></div>
1543<div class="section">
1544<h5><a name="Example"></a>Example</h5>
1545<p>(Q3.11) Alternative statement of Q3.10 (same result).</p>
1546
1547<div>
1548<div>
1549<pre class="source">FROM customers AS c JOIN orders AS o
1550 ON c.custid = o.custid
1551WHERE o.orderno = 1001
1552SELECT o.orderno,
1553 c.name AS customer_name,
1554 c.address,
1555 o.items AS items_ordered;
1556</pre></div></div>
1557
1558<p>Whether you express the join condition in a <tt>JOIN</tt> clause or in a <tt>WHERE</tt> clause is a matter of taste; the result is the same. This manual will generally use a comma-separated list of collection-names in the <tt>FROM</tt> clause, leaving the join condition to be expressed elsewhere. As we&#x2019;ll soon see, in some query blocks the join condition can be omitted entirely.</p>
1559<p>There is, however, one case in which an explicit <tt>JOIN</tt> clause is necessary. That is when you need to join collection A to collection B, and you want to make sure that every item in collection A is present in the query result, even if it doesn&#x2019;t match any item in collection B. This kind of query is called a <i>left outer join</i>, and it is illustrated by the following example.</p></div>
1560<div class="section">
1561<h5><a name="Example"></a>Example</h5>
1562<p>(Q3.12) List the customer ID and name, together with the order numbers and dates of their orders (if any) of customers T. Cody and M. Sinclair.</p>
1563
1564<div>
1565<div>
1566<pre class="source">FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
1567WHERE c.name = &quot;T. Cody&quot;
1568 OR c.name = &quot;M. Sinclair&quot;
1569SELECT c.custid, c.name, o.orderno, o.order_date
1570ORDER BY c.custid, o.order_date;
1571</pre></div></div>
1572
1573<p>Result:</p>
1574
1575<div>
1576<div>
1577<pre class="source">[
1578 {
1579 &quot;custid&quot;: &quot;C13&quot;,
1580 &quot;orderno&quot;: 1002,
1581 &quot;name&quot;: &quot;T. Cody&quot;,
1582 &quot;order_date&quot;: &quot;2020-05-01&quot;
1583 },
1584 {
1585 &quot;custid&quot;: &quot;C13&quot;,
1586 &quot;orderno&quot;: 1007,
1587 &quot;name&quot;: &quot;T. Cody&quot;,
1588 &quot;order_date&quot;: &quot;2020-09-13&quot;
1589 },
1590 {
1591 &quot;custid&quot;: &quot;C13&quot;,
1592 &quot;orderno&quot;: 1008,
1593 &quot;name&quot;: &quot;T. Cody&quot;,
1594 &quot;order_date&quot;: &quot;2020-10-13&quot;
1595 },
1596 {
1597 &quot;custid&quot;: &quot;C13&quot;,
1598 &quot;orderno&quot;: 1009,
1599 &quot;name&quot;: &quot;T. Cody&quot;,
1600 &quot;order_date&quot;: &quot;2020-10-13&quot;
1601 },
1602 {
1603 &quot;custid&quot;: &quot;C25&quot;,
1604 &quot;name&quot;: &quot;M. Sinclair&quot;
1605 }
1606]
1607</pre></div></div>
1608
1609<p>As you can see from the result of this left outer join, our data includes four orders from customer T. Cody, but no orders from customer M. Sinclair. The behavior of left outer join in SQL++ is different from that of SQL. SQL would have provided M. Sinclair with an order in which all the fields were <tt>null</tt>. SQL++, on the other hand, deals with schemaless data, which permits it to simply omit the order fields from the outer join.</p>
1610<p>Now we&#x2019;re ready to look at a new kind of join that was not provided (or needed) in original SQL. Consider this query:</p></div>
1611<div class="section">
1612<h5><a name="Example"></a>Example</h5>
1613<p>(Q3.13) For every case in which an item is ordered in a quantity greater than 100, show the order number, date, item number, and quantity.</p>
1614
1615<div>
1616<div>
1617<pre class="source">FROM orders AS o, o.items AS i
1618WHERE i.qty &gt; 100
1619SELECT o.orderno, o.order_date, i.itemno AS item_number,
1620 i.qty AS quantity
1621ORDER BY o.orderno, item_number;
1622</pre></div></div>
1623
1624<p>Result:</p>
1625
1626<div>
1627<div>
1628<pre class="source">[
1629 {
1630 &quot;orderno&quot;: 1002,
1631 &quot;order_date&quot;: &quot;2020-05-01&quot;,
1632 &quot;item_number&quot;: 680,
1633 &quot;quantity&quot;: 150
1634 },
1635 {
1636 &quot;orderno&quot;: 1005,
1637 &quot;order_date&quot;: &quot;2020-08-30&quot;,
1638 &quot;item_number&quot;: 347,
1639 &quot;quantity&quot;: 120
1640 },
1641 {
1642 &quot;orderno&quot;: 1006,
1643 &quot;order_date&quot;: &quot;2020-09-02&quot;,
1644 &quot;item_number&quot;: 460,
1645 &quot;quantity&quot;: 120
1646 }
1647]
1648</pre></div></div>
1649
1650<p>Q3.13 illustrates a feature called <i>left-correlation</i> in the <tt>FROM</tt> clause. Notice that we are joining <tt>orders</tt>, which is a dataset, to <tt>items</tt>, which is an array nested inside each order. In effect, for each order, we are unnesting the <tt>items</tt> array and joining it to the <tt>order</tt> as though it were a separate collection. For this reason, this kind of query is sometimes called an <i>unnesting query</i>. The keyword <tt>UNNEST</tt> may be used whenever left-correlation is used in a <tt>FROM</tt> clause, as shown in this example:</p></div>
1651<div class="section">
1652<h5><a name="Example"></a>Example</h5>
1653<p>(Q3.14) Alternative statement of Q3.13 (same result).</p>
1654
1655<div>
1656<div>
1657<pre class="source">FROM orders AS o UNNEST o.items AS i
1658WHERE i.qty &gt; 100
1659SELECT o.orderno, o.order_date, i.itemno AS item_number,
1660 i.qty AS quantity
1661ORDER BY o.orderno, item_number;
1662</pre></div></div>
1663
1664<p>The results of Q3.13 and Q3.14 are exactly the same. <tt>UNNEST</tt> serves as a reminder that left-correlation is being used to join an object with its nested items. The join condition in Q3.14 is expressed by the left-correlation: each order <tt>o</tt> is joined to its own items, referenced as <tt>o.items</tt>. The result of the <tt>FROM</tt> clause is a stream of binding tuples, each containing two variables, <tt>o</tt> and <tt>i</tt>. The variable <tt>o</tt> is bound to an order and the variable <tt>i</tt> is bound to one item inside that order.</p>
1665<p>Like <tt>JOIN</tt>, <tt>UNNEST</tt> has a <tt>LEFT OUTER</tt> option. Q3.14 could have specified:</p>
1666
1667<div>
1668<div>
1669<pre class="source">FROM orders AS o LEFT OUTER UNNEST o.items AS i
1670</pre></div></div>
1671
1672<p>In this case, orders that have no nested items would appear in the query result.</p></div></div></div></div>
1673<div class="section">
1674<h2><a name="LET_Clause"></a><a name="Let_clauses" id="Let_clauses">LET Clause</a></h2><hr />
1675<div class="section">
1676<h3><a name="LetClause"></a>LetClause</h3>
1677<p><b><img src="../images/diagrams/LetClause.png" alt="" /></b></p>
1678<div class="section">
1679<div class="section">
1680<h5><a name="Synonyms_for_LET:_LETTING"></a>Synonyms for <tt>LET</tt>: <tt>LETTING</tt></h5><hr />
1681<p><tt>LET</tt> clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The word <tt>LETTING</tt> can also be used, although this is not as common. The next query shows an example.</p></div>
1682<div class="section">
1683<h5><a name="Example"></a>Example</h5>
1684<p>(Q3.15) For each item in an order, the revenue is defined as the quantity times the price of that item. Find individual items for which the revenue is greater than 5000. For each of these, list the order number, item number, and revenue, in descending order by revenue.</p>
1685
1686<div>
1687<div>
1688<pre class="source">FROM orders AS o, o.items AS i
1689LET revenue = i.qty * i.price
1690WHERE revenue &gt; 5000
1691SELECT o.orderno, i.itemno, revenue
1692ORDER by revenue desc;
1693</pre></div></div>
1694
1695<p>Result:</p>
1696
1697<div>
1698<div>
1699<pre class="source">[
1700 {
1701 &quot;orderno&quot;: 1006,
1702 &quot;itemno&quot;: 460,
1703 &quot;revenue&quot;: 11997.6
1704 },
1705 {
1706 &quot;orderno&quot;: 1002,
1707 &quot;itemno&quot;: 460,
1708 &quot;revenue&quot;: 9594.05
1709 },
1710 {
1711 &quot;orderno&quot;: 1006,
1712 &quot;itemno&quot;: 120,
1713 &quot;revenue&quot;: 5525
1714 }
1715]
1716</pre></div></div>
1717
1718<p>The expression for computing revenue is defined once in the <tt>LET</tt> clause and then used three times in the remainder of the query. Avoiding repetition of the revenue expression makes the query shorter and less prone to errors.</p></div></div></div></div>
1719<div class="section">
1720<h2><a name="WHERE_Clause"></a><a name="Where_having_clauses" id="Where_having_clauses">WHERE Clause</a></h2><hr />
1721<div class="section">
1722<h3><a name="WhereClause"></a>WhereClause</h3>
1723<p><b><img src="../images/diagrams/WhereClause.png" alt="" /></b></p><hr />
1724<p>The purpose of a <tt>WHERE</tt> clause is to operate on the stream of binding tuples generated by the <tt>FROM</tt> clause, filtering out the tuples that do not satisfy a certain condition. The condition is specified by an expression based on the variable names in the binding tuples. If the expression evaluates to true, the tuple remains in the stream; if it evaluates to anything else, including <tt>null</tt> or <tt>missing</tt>, it is filtered out. The surviving tuples are then passed along to the next clause to be processed (usually either <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
1725<p>Often, the expression in a <tt>WHERE</tt> clause is some kind of comparison like <tt>quantity &gt; 100</tt>. However, any kind of expression is allowed in a <tt>WHERE</tt> clause. The only thing that matters is whether the expression returns <tt>true</tt> or not.</p></div></div>
1726<div class="section">
1727<h2><a name="Grouping"></a><a name="Group_By_clauses" id="Group_By_clauses">Grouping</a></h2>
1728<p>Grouping is especially important when manipulating hierarchies like the ones that are often found in JSON data. Often you will want to generate output data that includes both summary data and line items within the summaries. For this purpose, SQL++ supports several important extensions to the traditional grouping features of SQL. The familiar <tt>GROUP BY</tt> and <tt>HAVING</tt> clauses are still there, and they are joined by a new clause called <tt>GROUP AS</tt>. We&#x2019;ll illustrate these clauses by a series of examples.</p>
1729<div class="section">
1730<h3><a name="GROUP_BY_Clause"></a><a name="Left_outer_unnests" id="Left_outer_unnests">GROUP BY Clause</a></h3><hr /></div>
1731<div class="section">
1732<h3><a name="GroupByClause"></a>GroupByClause</h3>
1733<p><b><img src="../images/diagrams/GroupByClause.png" alt="" /></b></p><hr />
1734<p>We&#x2019;ll begin our discussion of grouping with an example from ordinary SQL.</p>
1735<div class="section">
1736<div class="section">
1737<h5><a name="Example"></a>Example</h5>
1738<p>(Q3.16) List the number of orders placed by each customer who has placed an order.</p>
1739
1740<div>
1741<div>
1742<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
1743FROM orders AS o
1744GROUP BY o.custid
1745ORDER BY o.custid;
1746</pre></div></div>
1747
1748<p>Result:</p>
1749
1750<div>
1751<div>
1752<pre class="source">[
1753 {
1754 &quot;order count&quot;: 4,
1755 &quot;custid&quot;: &quot;C13&quot;
1756 },
1757 {
1758 &quot;order count&quot;: 1,
1759 &quot;custid&quot;: &quot;C31&quot;
1760 },
1761 {
1762 &quot;order count&quot;: 1,
1763 &quot;custid&quot;: &quot;C35&quot;
1764 },
1765 {
1766 &quot;order count&quot;: 1,
1767 &quot;custid&quot;: &quot;C37&quot;
1768 },
1769 {
1770 &quot;order count&quot;: 2,
1771 &quot;custid&quot;: &quot;C41&quot;
1772 }
1773]
1774</pre></div></div>
1775
1776<p>The input to a <tt>GROUP BY</tt> clause is the stream of binding tuples generated by the <tt>FROM</tt> and <tt>WHERE</tt>clauses. In this query, before grouping, the variable <tt>o</tt> is bound to each object in the <tt>orders</tt> collection in turn.</p>
1777<p>SQL++ evaluates the expression in the <tt>GROUP BY</tt> clause, called the grouping expression, once for each of the binding tuples. It then organizes the results into groups in which the grouping expression has a common value (as defined by the <tt>=</tt> operator). In this example, the grouping expression is <tt>o.custid</tt>, and each of the resulting groups is a set of <tt>orders</tt> that have the same <tt>custid</tt>. If necessary, a group is formed for <tt>orders</tt> in which <tt>custid</tt> is <tt>null</tt>, and another group is formed for <tt>orders</tt> that have no <tt>custid</tt>. This query uses the aggregating function <tt>COUNT(o.orderno)</tt>, which counts how many order numbers are in each group. If we are sure that each order object has a distinct <tt>orderno</tt>, we could also simply count the order objects in each group by using <tt>COUNT(*)</tt> in place of <tt>COUNT(o.orderno)</tt>.</p>
1778<p>In the <tt>GROUP BY</tt>clause, you may optionally define an alias for the grouping expression. For example, in Q3.16, you could have written <tt>GROUP BY o.custid AS cid</tt>. The alias <tt>cid</tt> could then be used in place of the grouping expression in later clauses. In cases where the grouping expression contains an operator, it is especially helpful to define an alias (for example, <tt>GROUP BY salary + bonus AS pay)</tt>.</p>
1779<p>Q3.16 had a single grouping expression, <tt>o.custid</tt>. If a query has multiple grouping expressions, the combination of grouping expressions is evaluated for every binding tuple, and the stream of binding tuples is partitioned into groups that have values in common for all of the grouping expressions. We&#x2019;ll see an example of such a query in Q3.18.</p>
1780<p>After grouping, the number of binding tuples is reduced: instead of a binding tuple for each of the input objects, there is a binding tuple for each group. The grouping expressions (identified by their aliases, if any) are bound to the results of their evaluations. However, all the non-grouping fields (that is, fields that were not named in the grouping expressions), are accessible only in a special way: as an argument of one of the special aggregation pseudo-functions such as: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, <tt>STDEV</tt> and <tt>COUNT</tt>. The clauses that come after grouping can access only properties of groups, including the grouping expressions and aggregate properties of the groups such as <tt>COUNT(o.orderno)</tt> or <tt>COUNT(*)</tt>. (We&#x2019;ll see an exception when we discuss the new <tt>GROUP AS</tt> clause.)</p>
1781<p>You may notice that the results of Q3.16 do not include customers who have no <tt>orders</tt>. If we want to include these <tt>customers</tt>, we need to use an outer join between the <tt>customers</tt> and <tt>orders</tt> collections. This is illustrated by the following example, which also includes the name of each customer.</p></div>
1782<div class="section">
1783<h5><a name="Example"></a>Example</h5>
1784<p>(Q3.17) List the number of orders placed by each customer including those customers who have placed no orders.</p>
1785
1786<div>
1787<div>
1788<pre class="source">SELECT c.custid, c.name, COUNT(o.orderno) AS `order count`
1789FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
1790GROUP BY c.custid, c.name
1791ORDER BY c.custid;
1792</pre></div></div>
1793
1794<p>Result:</p>
1795
1796<div>
1797<div>
1798<pre class="source">[
1799 {
1800 &quot;custid&quot;: &quot;C13&quot;,
1801 &quot;order count&quot;: 4,
1802 &quot;name&quot;: &quot;T. Cody&quot;
1803 },
1804 {
1805 &quot;custid&quot;: &quot;C25&quot;,
1806 &quot;order count&quot;: 0,
1807 &quot;name&quot;: &quot;M. Sinclair&quot;
1808 },
1809 {
1810 &quot;custid&quot;: &quot;C31&quot;,
1811 &quot;order count&quot;: 1,
1812 &quot;name&quot;: &quot;B. Pruitt&quot;
1813 },
1814 {
1815 &quot;custid&quot;: &quot;C35&quot;,
1816 &quot;order count&quot;: 1,
1817 &quot;name&quot;: &quot;J. Roberts&quot;
1818 },
1819 {
1820 &quot;custid&quot;: &quot;C37&quot;,
1821 &quot;order count&quot;: 1,
1822 &quot;name&quot;: &quot;T. Henry&quot;
1823 },
1824 {
1825 &quot;custid&quot;: &quot;C41&quot;,
1826 &quot;order count&quot;: 2,
1827 &quot;name&quot;: &quot;R. Dodge&quot;
1828 },
1829 {
1830 &quot;custid&quot;: &quot;C47&quot;,
1831 &quot;order count&quot;: 0,
1832 &quot;name&quot;: &quot;S. Logan&quot;
1833 }
1834]
1835</pre></div></div>
1836
1837<p>Notice in Q3.17 what happens when the special aggregation function <tt>COUNT</tt> is applied to a collection that does not exist, such as the orders of M. Sinclair: it returns zero. This behavior is unlike that of the other special aggregation functions <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, and <tt>MIN</tt>, which return <tt>null</tt> if their operand does not exist. This should make you cautious about the <tt>COUNT</tt> function: If it returns zero, that may mean that the collection you are counting has zero members, or that it does not exist, or that you have misspelled the collection&#x2019;s name.</p>
1838<p>Q3.17 also shows how a query block can have more than one grouping expression. In general, the <tt>GROUP BY</tt>clause produces a binding tuple for each different combination of values for the grouping expressions. In Q3.17, the <tt>c.custid</tt> field uniquely identifies a customer, so adding <tt>c.name</tt> as a grouping expression does not result in any more groups. Nevertheless, <tt>c.name</tt> must be included as a grouping expression if it is to be referenced outside (after) the <tt>GROUP BY</tt> clause. If <tt>c.name</tt> were not included in the <tt>GROUP BY</tt> clause, it would not be a group property and could not be used in the <tt>SELECT</tt> clause.</p>
1839<p>Of course, a grouping expression need not be a simple field-name. In Q3.18, orders are grouped by month, using a temporal function to extract the month component of the order dates. In cases like this, it is helpful to define an alias for the grouping expression so that it can be referenced elsewhere in the query e.g. in the <tt>SELECT</tt> clause.</p></div>
1840<div class="section">
1841<h5><a name="Example"></a>Example</h5>
1842<p>(Q3.18) Find the months in 2020 that had the largest numbers of orders; list the months and their numbers of orders. (Return the top three.)</p>
1843
1844<div>
1845<div>
1846<pre class="source">FROM orders AS o
1847WHERE get_year(date(o.order_date)) = 2020
1848GROUP BY get_month(date(o.order_date)) AS month
1849SELECT month, COUNT(*) AS order_count
1850ORDER BY order_count desc
1851LIMIT 3;
1852</pre></div></div>
1853
1854<p>Result:</p>
1855
1856<div>
1857<div>
1858<pre class="source">[
1859 {
1860 &quot;month&quot;: 10,
1861 &quot;order_count&quot;: 2
1862 },
1863 {
1864 &quot;month&quot;: 9,
1865 &quot;order_count&quot;: 2
1866 },
1867 {
1868 &quot;month&quot;: 8,
1869 &quot;order_count&quot;: 1
1870 }
1871]
1872</pre></div></div>
1873
1874<p>Groups are commonly formed from named collections like <tt>customers</tt> and <tt>orders</tt>. But in some queries you need to form groups from a collection that is nested inside another collection, such as <tt>items</tt> inside <tt>orders</tt>. In SQL++ you can do this by using left-correlation in the <tt>FROM</tt> clause to unnest the inner collection, joining the inner collection with the outer collection, and then performing the grouping on the join, as illustrated in Q3.19.</p>
1875<p>Q3.19 also shows how a <tt>LET</tt> clause can be used after a <tt>GROUP BY</tt> clause to define an expression that is referenced multiple times in later clauses.</p></div>
1876<div class="section">
1877<h5><a name="Example"></a>Example</h5>
1878<p>(Q3.19) For each order, define the total revenue of the order as the sum of quantity times price for all the items in that order. List the total revenue for all the orders placed by the customer with id &#x201c;C13&#x201d;, in descending order by total revenue.</p>
1879
1880<div>
1881<div>
1882<pre class="source">FROM orders as o, o.items as i
1883WHERE o.custid = &quot;C13&quot;
1884GROUP BY o.orderno
1885LET total_revenue = sum(i.qty * i.price)
1886SELECT o.orderno, total_revenue
1887ORDER BY total_revenue desc;
1888</pre></div></div>
1889
1890<p>Result:</p>
1891
1892<div>
1893<div>
1894<pre class="source">[
1895 {
1896 &quot;orderno&quot;: 1002,
1897 &quot;total_revenue&quot;: 10906.55
1898 },
1899 {
1900 &quot;orderno&quot;: 1008,
1901 &quot;total_revenue&quot;: 1999.8
1902 },
1903 {
1904 &quot;orderno&quot;: 1007,
1905 &quot;total_revenue&quot;: 130.45
1906 }
1907]
1908</pre></div></div>
1909</div></div></div>
1910<div class="section">
1911<h3><a name="HAVING_Clause"></a><a name="Left_outer_unnests" id="Left_outer_unnests">HAVING Clause</a></h3><hr /></div>
1912<div class="section">
1913<h3><a name="HavingClause"></a>HavingClause</h3>
1914<p><b><img src="../images/diagrams/HavingClause.png" alt="" /></b></p><hr />
1915<p>The <tt>HAVING</tt> clause is very similar to the <tt>WHERE</tt> clause, except that it comes after <tt>GROUP BY</tt> and applies a filter to groups rather than to individual objects. Here&#x2019;s an example of a <tt>HAVING</tt> clause that filters orders by applying a condition to their nested arrays of <tt>items</tt>.</p>
1916<p>By adding a <tt>HAVING</tt> clause to Q3.19, we can filter the results to include only those orders whose total revenue is greater than 1000, as shown in Q3.22.</p>
1917<div class="section">
1918<div class="section">
1919<h5><a name="Example"></a>Example</h5>
1920<p>(Q3.20) Modify Q3.19 to include only orders whose total revenue is greater than 5000.</p>
1921
1922<div>
1923<div>
1924<pre class="source">FROM orders AS o, o.items as i
1925WHERE o.custid = &quot;C13&quot;
1926GROUP BY o.orderno
1927LET total_revenue = sum(i.qty * i.price)
1928HAVING total_revenue &gt; 5000
1929SELECT o.orderno, total_revenue
1930ORDER BY total_revenue desc;
1931</pre></div></div>
1932
1933<p>Result:</p>
1934
1935<div>
1936<div>
1937<pre class="source">[
1938 {
1939 &quot;orderno&quot;: 1002,
1940 &quot;total_revenue&quot;: 10906.55
1941 }
1942]
1943</pre></div></div>
1944</div></div></div>
1945<div class="section">
1946<h3><a name="Aggregation_Pseudo-Functions"></a><a name="Aggregation_PseudoFunctions" id="Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a></h3>
1947<p>SQL provides several special functions for performing aggregations on groups including: <tt>SUM</tt>, <tt>AVG</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>COUNT</tt> (some implementations provide more). These same functions are supported in SQL++. However, it&#x2019;s worth spending some time on these special functions because they don&#x2019;t behave like ordinary functions. They are called &#x201c;pseudo-functions&#x201d; here because they don&#x2019;t evaluate their operands in the same way as ordinary functions. To see the difference, consider these two examples, which are syntactically similar:</p>
1948<div class="section">
1949<div class="section">
1950<h5><a name="Example_1:"></a>Example 1:</h5>
1951
1952<div>
1953<div>
1954<pre class="source">SELECT LENGTH(name) FROM customers
1955</pre></div></div>
1956
1957<p>In Example 1, <tt>LENGTH</tt> is an ordinary function. It simply evaluates its operand (name) and then returns a result computed from the operand.</p></div>
1958<div class="section">
1959<h5><a name="Example_2:"></a>Example 2:</h5>
1960
1961<div>
1962<div>
1963<pre class="source">SELECT AVG(rating) FROM customers
1964</pre></div></div>
1965
1966<p>The effect of <tt>AVG</tt> in Example 2 is quite different. Rather than performing a computation on an individual rating value, <tt>AVG</tt> has a global effect: it effectively restructures the query. As a pseudo-function, <tt>AVG</tt> requires its operand to be a group; therefore, it automatically collects all the rating values from the query block and forms them into a group.</p>
1967<p>The aggregation pseudo-functions always require their operand to be a group. In some queries, the group is explicitly generated by a <tt>GROUP BY</tt> clause, as in Q3.21:</p></div>
1968<div class="section">
1969<h5><a name="Example"></a>Example</h5>
1970<p>(Q3.21) List the average credit rating of customers by zipcode.</p>
1971
1972<div>
1973<div>
1974<pre class="source">FROM customers AS c
1975GROUP BY c.address.zipcode AS zip
1976SELECT zip, AVG(c.rating) AS `avg credit rating`
1977ORDER BY zip;
1978</pre></div></div>
1979
1980<p>Result:</p>
1981
1982<div>
1983<div>
1984<pre class="source">[
1985 {
1986 &quot;avg credit rating&quot;: 625
1987 },
1988 {
1989 &quot;avg credit rating&quot;: 657.5,
1990 &quot;zip&quot;: &quot;02115&quot;
1991 },
1992 {
1993 &quot;avg credit rating&quot;: 690,
1994 &quot;zip&quot;: &quot;02340&quot;
1995 },
1996 {
1997 &quot;avg credit rating&quot;: 695,
1998 &quot;zip&quot;: &quot;63101&quot;
1999 }
2000]
2001</pre></div></div>
2002
2003<p>Note in the result of Q3.21 that one or more customers had no zipcode. These customers were formed into a group for which the value of the grouping key is missing. When the query results were returned in JSON format, the <tt>missing</tt> key simply does not appear. Also note that the group whose key is <tt>missing</tt> appears first because <tt>missing</tt> is considered to be smaller than any other value. If some customers had had <tt>null</tt> as a zipcode, they would have been included in another group, appearing after the <tt>missing</tt> group but before the other groups.</p>
2004<p>When an aggregation pseudo-function is used without an explicit <tt>GROUP BY</tt> clause, it implicitly forms the entire query block into a single group, as in Q3.22:</p></div>
2005<div class="section">
2006<h5><a name="Example"></a>Example</h5>
2007<p>(Q3.22) Find the average credit rating among all customers.</p>
2008
2009<div>
2010<div>
2011<pre class="source">FROM customers AS c
2012SELECT AVG(c.rating) AS `avg credit rating`;
2013</pre></div></div>
2014
2015<p>Result:</p>
2016
2017<div>
2018<div>
2019<pre class="source">[
2020 {
2021 &quot;avg credit rating&quot;: 670
2022 }
2023]
2024</pre></div></div>
2025
2026<p>The aggregation pseudo-function <tt>COUNT</tt> has a special form in which its operand is <tt>*</tt> instead of an expression. For example, <tt>SELECT COUNT(*) FROM customers</tt> simply returns the total number of customers, whereas <tt>SELECT COUNT(rating) FROM customers</tt> returns the number of customers who have known ratings (that is, their ratings are not <tt>null</tt> or <tt>missing</tt>).</p>
2027<p>Because the aggregation pseudo-functions sometimes restructure their operands, they can be used only in query blocks where (explicit or implicit) grouping is being done. Therefore the pseudo-functions cannot operate directly on arrays or multisets. For operating directly on JSON collections, SQL++ provides a set of ordinary functions for computing aggregations. Each ordinary aggregation function (except the ones corresponding to <tt>COUNT</tt> and <tt>ARRAY_AGG</tt>) has two versions: one that ignores <tt>null</tt> and <tt>missing</tt> values and one that returns <tt>null</tt> if a <tt>null</tt> or <tt>missing</tt> value is encountered anywhere in the collection. The names of the aggregation functions are as follows:</p>
2028<table border="0" class="table table-striped">
2029<thead>
2030
2031<tr class="a">
2032<th> Aggregation pseudo-function; operates on groups only </th>
2033<th> ordinary functions: Ignores NULL or MISSING values </th>
2034<th> ordinary functions: Returns NULL if NULL or MISSING are encountered</th></tr>
2035</thead><tbody>
2036
2037<tr class="b">
2038<td>SUM</td>
2039<td> ARRAY_SUM</td>
2040<td> STRICT_SUM </td></tr>
2041<tr class="a">
2042<td> AVG </td>
2043<td>ARRAY_MAX</td>
2044<td> STRICT_MAX </td></tr>
2045<tr class="b">
2046<td> MAX </td>
2047<td> ARRAY_MIN</td>
2048<td> STRICT_MIN </td></tr>
2049<tr class="a">
2050<td> MIN </td>
2051<td> ARRAY_AVG</td>
2052<td> STRICT_AVG </td></tr>
2053<tr class="b">
2054<td> COUNT </td>
2055<td>ARRAY_COUNT</td>
2056<td>STRICT_COUNT (see exception below) </td></tr>
2057<tr class="a">
2058<td>STDDEV_SAMP</td>
2059<td>ARRAY_STDDEV_SAMP</td>
2060<td> STRICT_STDDEV_SAMP </td></tr>
2061<tr class="b">
2062<td>STDDEV_POP</td>
2063<td>ARRAY_STDDEV_POP</td>
2064<td> STRICT_STDDEV_POP </td></tr>
2065<tr class="a">
2066<td>VAR_SAMP</td>
2067<td>ARRAY_VAR_SAMP</td>
2068<td> STRICT_VAR_SAMP </td></tr>
2069<tr class="b">
2070<td>VAR_POP</td>
2071<td>ARRAY_VAR_POP</td>
2072<td> STRICT_VAR_POP </td></tr>
2073<tr class="a">
2074<td>SKEWENESS</td>
2075<td>ARRAY_SKEWNESS</td>
2076<td> STRICT_SKEWNESS </td></tr>
2077<tr class="b">
2078<td>KURTOSIS</td>
2079<td>ARRAY_KURTOSIS</td>
2080<td> STRICT_KURTOSIS </td></tr>
2081<tr class="a">
2082<td></td>
2083<td colspan="4">ARRAY_AGG</td></tr>
2084</tbody>
2085</table></div>
2086<div class="section">
2087<h5><a name="Exception:_the_ordinary_aggregation_function_STRICT_COUNT_operates_on_any_collection.2C_and_returns_a_count_of_its_items.2C_including_null_values_in_the_count._In_this_respect.2C_STRICT_COUNT_is_more_similar_to_COUNT.28.2A.29_than_to_COUNT.28expression.29."></a>Exception: the ordinary aggregation function STRICT_COUNT operates on any collection, and returns a count of its items, including null values in the count. In this respect, STRICT_COUNT is more similar to COUNT(*) than to COUNT(expression).</h5>
2088<p>Note that the ordinary aggregation functions that ignore <tt>null</tt> have names beginning with &#x201c;ARRAY.&#x201d; This naming convention has historical roots. Despite their names, the functions operate on both arrays and multisets.</p>
2089<p>Because of the special properties of the aggregation pseudo-functions, SQL (and therefore SQL++) is not a pure functional language. But every query that uses a pseudo-function can be expressed as an equivalent query that uses an ordinary function. Q3.23 is an example of how queries can be expressed without pseudo-functions. A more detailed explanation of all of the functions is also available <a href="builtins.html#AggregateFunctions">here</a> .</p></div>
2090<div class="section">
2091<h5><a name="Example"></a>Example</h5>
2092<p>(Q3.23) Alternative form of Q3.22, using the ordinary function <tt>ARRAY_AVG</tt> rather than the aggregating pseudo-function <tt>AVG</tt>.</p>
2093
2094<div>
2095<div>
2096<pre class="source">SELECT ARRAY_AVG(
2097 (SELECT VALUE c.rating
2098 FROM customers AS c) ) AS `avg credit rating`;
2099</pre></div></div>
2100
2101<p>Result (same as Q3.22):</p>
2102
2103<div>
2104<div>
2105<pre class="source">[
2106 {
2107 &quot;avg credit rating&quot;: 670
2108 }
2109]
2110</pre></div></div>
2111
2112<p>If the function <tt>STRICT_AVG</tt> had been used in Q3.23 in place of <tt>ARRAY_AVG</tt>, the average credit rating returned by the query would have been <tt>null</tt>, because at least one customer has no credit rating.</p></div></div></div>
2113<div class="section">
2114<h3><a name="GROUP_AS_Clause"></a><a name="Left_outer_unnests" id="Left_outer_unnests">GROUP AS Clause</a></h3><hr /></div>
2115<div class="section">
2116<h3><a name="GroupAsClause"></a>GroupAsClause</h3>
2117<p><b><img src="../images/diagrams/GroupAsClause.png" alt="" /></b></p><hr />
2118<p>JSON is a hierarchical format, and a fully featured JSON query language needs to be able to produce hierarchies of its own, with computed data at every level of the hierarchy. The key feature of SQL++ that makes this possible is the <tt>GROUP AS</tt> clause.</p>
2119<p>A query may have a <tt>GROUP AS</tt> clause only if it has a <tt>GROUP BY</tt> clause. The <tt>GROUP BY</tt> clause &#x201c;hides&#x201d; the original objects in each group, exposing only the grouping expressions and special aggregation functions on the non-grouping fields. The purpose of the <tt>GROUP AS</tt> clause is to make the original objects in the group visible to subsequent clauses. Thus the query can generate output data both for the group as a whole and for the individual objects inside the group.</p>
2120<p>For each group, the <tt>GROUP AS</tt> clause preserves all the objects in the group, just as they were before grouping, and gives a name to this preserved group. The group name can then be used in the <tt>FROM</tt> clause of a subquery to process and return the individual objects in the group.</p>
2121<p>To see how this works, we&#x2019;ll write some queries that investigate the customers in each zipcode and their credit ratings. This would be a good time to review the sample database in Appendix 4. A part of the data is summarized below.</p>
2122
2123<div>
2124<div>
2125<pre class="source">Customers in zipcode 02115:
2126 C35, J. Roberts, rating 565
2127 C37, T. Henry, rating 750
2128
2129Customers in zipcode 02340:
2130 C25, M. Sinclair, rating 690
2131
2132Customers in zipcode 63101:
2133 C13, T. Cody, rating 750
2134 C31, B. Pruitt, (no rating)
2135 C41, R. Dodge, rating 640
2136
2137Customers with no zipcode:
2138 C47, S. Logan, rating 625
2139</pre></div></div>
2140
2141<p>Now let&#x2019;s consider the effect of the following clauses:</p>
2142
2143<div>
2144<div>
2145<pre class="source">FROM customers AS c
2146GROUP BY c.address.zipcode
2147GROUP AS g
2148</pre></div></div>
2149
2150<p>This query fragment iterates over the <tt>customers</tt> objects, using the iteration variable <tt>c</tt>. The <tt>GROUP BY</tt> clause forms the objects into groups, each with a common zipcode (including one group for customers with no zipcode). After the <tt>GROUP BY</tt> clause, we can see the grouping expression, <tt>c.address.zipcode</tt>, but other fields such as <tt>c.custid</tt> and <tt>c.name</tt> are visible only to special aggregation functions.</p>
2151<p>The clause <tt>GROUP AS g</tt> now makes the original objects visible again. For each group in turn, the variable <tt>g</tt> is bound to a multiset of objects, each of which has a field named <tt>c</tt>, which in turn contains one of the original objects. Thus after <tt>GROUP AS g</tt>, for the group with zipcode 02115, <tt>g</tt> is bound to the following multiset:</p>
2152
2153<div>
2154<div>
2155<pre class="source">[
2156 { &quot;c&quot;:
2157 { &quot;custid&quot;: &quot;C35&quot;,
2158 &quot;name&quot;: &quot;J. Roberts&quot;,
2159 &quot;address&quot;:
2160 { &quot;street&quot;: &quot;420 Green St.&quot;,
2161 &quot;city&quot;: &quot;Boston, MA&quot;,
2162 &quot;zipcode&quot;: &quot;02115&quot;
2163 },
2164 &quot;rating&quot;: 565
2165 }
2166 },
2167 { &quot;c&quot;:
2168 { &quot;custid&quot;: &quot;C37&quot;,
2169 &quot;name&quot;: &quot;T. Henry&quot;,
2170 &quot;address&quot;:
2171 { &quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
2172 &quot;city&quot;: &quot;St. Louis, MO&quot;,
2173 &quot;zipcode&quot;: &quot;02115&quot;
2174 },
2175 &quot;rating&quot;: 750
2176 }
2177 }
2178]
2179</pre></div></div>
2180
2181<p>Thus, the clauses following <tt>GROUP AS</tt> can see the original objects by writing subqueries that iterate over the multiset <tt>g</tt>.</p>
2182<p>The extra level named <tt>c</tt> was introduced into this multiset because the groups might have been formed from a join of two or more collections. Suppose that the <tt>FROM</tt> clause looked like <tt>FROM customers AS c, orders AS o</tt>. Then each item in the group would contain both a <tt>customers</tt> object and an <tt>orders</tt> object, and these two objects might both have a field with the same name. To avoid ambiguity, each of the original objects is wrapped in an &#x201c;outer&#x201d; object that gives it the name of its iteration variable in the <tt>FROM</tt> clause. Consider this fragment:</p>
2183
2184<div>
2185<div>
2186<pre class="source">FROM customers AS c, orders AS o
2187WHERE c.custid = o.custid
2188GROUP BY c.address.zipcode
2189GROUP AS g
2190</pre></div></div>
2191
2192<p>In this case, following <tt>GROUP AS g</tt>, the variable <tt>g</tt> would be bound to the following collection:</p>
2193
2194<div>
2195<div>
2196<pre class="source">[
2197 { &quot;c&quot;: { an original customers object },
2198 &quot;o&quot;: { an original orders object }
2199 },
2200 { &quot;c&quot;: { another customers object },
2201 &quot;o&quot;: { another orders object }
2202 },
2203 ...
2204]
2205</pre></div></div>
2206
2207<p>After using <tt>GROUP AS</tt> to make the content of a group accessible, you will probably want to write a subquery to access that content. A subquery for this purpose is written in exactly the same way as any other subquery. The name specified in the <tt>GROUP AS</tt> clause (<tt>g</tt> in the above example) is the name of a collection of objects. You can write a <tt>FROM</tt> clause to iterate over the objects in the collection, and you can specify an iteration variable to represent each object in turn. For <tt>GROUP AS</tt> queries in this manual, I&#x2019;ll use <tt>g</tt>as the name of the reconstituted group, and <tt>gi</tt> as an iteration variable representing one object inside the group. Of course, you can use any names you like for these purposes.</p>
2208<p>Now we are ready to take a look at how <tt>GROUP AS</tt> might be used in a query. Suppose that we want to group customers by zipcode, and for each group we want to see the average credit rating and a list of the individual customers in the group. Here&#x2019;s a query that does that:</p>
2209<div class="section">
2210<div class="section">
2211<h5><a name="Example"></a>Example</h5>
2212<p>(Q3.24) For each zipcode, list the average credit rating in that zipcode, followed by the customer numbers and names in numeric order.</p>
2213
2214<div>
2215<div>
2216<pre class="source">FROM customers AS c
2217GROUP BY c.address.zipcode AS zip
2218GROUP AS g
2219SELECT zip, AVG(c.rating) AS `avg credit rating`,
2220 (FROM g AS gi
2221 SELECT gi.c.custid, gi.c.name
2222 ORDER BY gi.c.custid) AS `local customers`
2223ORDER BY zip;
2224</pre></div></div>
2225
2226<p>Result:</p>
2227
2228<div>
2229<div>
2230<pre class="source">[
2231 {
2232 &quot;avg credit rating&quot;: 625,
2233 &quot;local customers&quot;: [
2234 {
2235 &quot;custid&quot;: &quot;C47&quot;,
2236 &quot;name&quot;: &quot;S. Logan&quot;
2237 }
2238 ]
2239 },
2240 {
2241 &quot;avg credit rating&quot;: 657.5,
2242 &quot;local customers&quot;: [
2243 {
2244 &quot;custid&quot;: &quot;C35&quot;,
2245 &quot;name&quot;: &quot;J. Roberts&quot;
2246 },
2247 {
2248 &quot;custid&quot;: &quot;C37&quot;,
2249 &quot;name&quot;: &quot;T. Henry&quot;
2250 }
2251 ],
2252 &quot;zip&quot;: &quot;02115&quot;
2253 },
2254 {
2255 &quot;avg credit rating&quot;: 690,
2256 &quot;local customers&quot;: [
2257 {
2258 &quot;custid&quot;: &quot;C25&quot;,
2259 &quot;name&quot;: &quot;M. Sinclair&quot;
2260 }
2261 ],
2262 &quot;zip&quot;: &quot;02340&quot;
2263 },
2264 {
2265 &quot;avg credit rating&quot;: 695,
2266 &quot;local customers&quot;: [
2267 {
2268 &quot;custid&quot;: &quot;C13&quot;,
2269 &quot;name&quot;: &quot;T. Cody&quot;
2270 },
2271 {
2272 &quot;custid&quot;: &quot;C31&quot;,
2273 &quot;name&quot;: &quot;B. Pruitt&quot;
2274 },
2275 {
2276 &quot;custid&quot;: &quot;C41&quot;,
2277 &quot;name&quot;: &quot;R. Dodge&quot;
2278 }
2279 ],
2280 &quot;zip&quot;: &quot;63101&quot;
2281 }
2282]
2283</pre></div></div>
2284
2285<p>Note that this query contains two <tt>ORDER BY</tt> clauses: one in the outer query and one in the subquery. These two clauses govern the ordering of the outer-level list of zipcodes and the inner-level lists of customers, respectively. Also note that the group of customers with no zipcode comes first in the output list. For additional reading on SQL++ and more examples using <tt>GROUP AS</tt> as well as other clauses discussed in this manual see the <a class="externalLink" href="https://asterixdb.apache.org/files/SQL_Book.pdf">SQL++ Tutorial</a>.</p></div></div></div></div>
2286<div class="section">
2287<h2><a name="Selection_and_UNION_ALL"></a><a name="Union_all" id="Union_all">Selection and UNION ALL</a></h2><hr />
2288<div class="section">
2289<h3><a name="Selection"></a>Selection</h3>
2290<p><b><img src="../images/diagrams/Selection.png" alt="" /></b></p></div>
2291<div class="section">
2292<h3><a name="UnionOption"></a>UnionOption</h3>
2293<p><b><img src="../images/diagrams/UnionOption.png" alt="" /></b></p><hr />
2294<p>In a SQL++ query, two or more query blocks can be connected by the operator <tt>UNION ALL</tt>. The result of a <tt>UNION ALL</tt> between two query blocks contains all the items returned by the first query block, and all the items returned by the second query block. Duplicate items are not eliminated from the query result.</p>
2295<p>As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular, it allows heterogeneity on the input and output streams. A type error will be raised if one of the inputs is not a collection.</p>
2296<p>When two or more query blocks are connected by <tt>UNION ALL</tt>, they can be followed by <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt> clauses that apply to the <tt>UNION</tt> query as a whole. For these clauses to be meaningful, the field-names returned by the two query blocks should match. The following example shows a <tt>UNION ALL</tt> of two query blocks, with an ordering specified for the result.</p>
2297<p>In this example, a customer might be selected because he has ordered more than two different items (first query block) or because he has a high credit rating (second query block). By adding an explanatory string to each query block, the query writer can cause the output objects to be labeled to distinguish these two cases.</p>
2298<div class="section">
2299<div class="section">
2300<h5><a name="Example"></a>Example</h5>
2301<p>(Q3.25a) Find customer ids for customers who have placed orders for more than two different items or who have a credit rating greater than 700, with labels to distinguish these cases.</p>
2302
2303<div>
2304<div>
2305<pre class="source">FROM orders AS o, o.items AS i
2306GROUP BY o.orderno, o.custid
2307HAVING COUNT(*) &gt; 2
2308SELECT DISTINCT o.custid AS customer_id, &quot;Big order&quot; AS reason
2309
2310UNION ALL
2311
2312FROM customers AS c
2313WHERE rating &gt; 700
2314SELECT c.custid AS customer_id, &quot;High rating&quot; AS reason
2315ORDER BY customer_id;
2316</pre></div></div>
2317
2318<p>Result:</p>
2319
2320<div>
2321<div>
2322<pre class="source">[
2323 {
2324 &quot;reason&quot;: &quot;High rating&quot;,
2325 &quot;customer_id&quot;: &quot;C13&quot;
2326 },
2327 {
2328 &quot;reason&quot;: &quot;Big order&quot;,
2329 &quot;customer_id&quot;: &quot;C37&quot;
2330 },
2331 {
2332 &quot;reason&quot;: &quot;High rating&quot;,
2333 &quot;customer_id&quot;: &quot;C37&quot;
2334 },
2335 {
2336 &quot;reason&quot;: &quot;Big order&quot;,
2337 &quot;customer_id&quot;: &quot;C41&quot;
2338 }
2339]
2340</pre></div></div>
2341
2342<p>If, on the other hand, you simply want a list of the customer ids and you don&#x2019;t care to preserve the reasons, you can simplify your output by using <tt>SELECT VALUE</tt>, as follows:</p>
2343<p>(Q3.25b) Simplify Q3.25a to return a simple list of unlabeled customer ids.</p>
2344
2345<div>
2346<div>
2347<pre class="source">FROM orders AS o, o.items AS i
2348GROUP BY o.orderno, o.custid
2349HAVING COUNT(*) &gt; 2
2350SELECT VALUE o.custid
2351
2352UNION ALL
2353
2354FROM customers AS c
2355WHERE rating &gt; 700
2356SELECT VALUE c.custid;
2357</pre></div></div>
2358
2359<p>Result:</p>
2360
2361<div>
2362<div>
2363<pre class="source">[
2364 &quot;C37&quot;,
2365 &quot;C41&quot;,
2366 &quot;C13&quot;,
2367 &quot;C37&quot;
2368]
2369</pre></div></div>
2370</div></div></div></div>
2371<div class="section">
2372<h2><a name="WITH_Clause"></a><a name="With_clauses" id="With_clauses">WITH Clause</a></h2><hr />
2373<div class="section">
2374<h3><a name="WithClause"></a>WithClause</h3>
2375<p><b><img src="../images/diagrams/WithClause.png" alt="" /></b></p><hr />
2376<p>As in standard SQL, a <tt>WITH</tt> clause can be used to improve the modularity of a query. A <tt>WITH</tt> clause often contains a subquery that is needed to compute some result that is used later in the main query. In cases like this, you can think of the <tt>WITH</tt> clause as computing a &#x201c;temporary view&quot; of the input data. The next example uses a <tt>WITH</tt> clause to compute the total revenue of each order in 2020; then the main part of the query finds the minimum, maximum, and average revenue for orders in that year.</p>
2377<div class="section">
2378<div class="section">
2379<h5><a name="Example"></a>Example</h5>
2380<p>(Q3.26) Find the minimum, maximum, and average revenue among all orders in 2020, rounded to the nearest integer.</p>
2381
2382<div>
2383<div>
2384<pre class="source">WITH order_revenue AS
2385 (FROM orders AS o, o.items AS i
2386 WHERE get_year(date(o.order_date)) = 2020
2387 GROUP BY o.orderno
2388 SELECT o.orderno, SUM(i.qty * i.price) AS revenue
2389 )
2390FROM order_revenue
2391SELECT AVG(revenue) AS average,
2392 MIN(revenue) AS minimum,
2393 MAX(revenue) AS maximum;
2394</pre></div></div>
2395
2396<p>Result:</p>
2397
2398<div>
2399<div>
2400<pre class="source">[
2401 {
2402 &quot;average&quot;: 4669.99,
2403 &quot;minimum&quot;: 130.45,
2404 &quot;maximum&quot;: 18847.58
2405 }
2406]
2407</pre></div></div>
2408
2409<p><tt>WITH</tt> can be particularly useful when a value needs to be used several times in a query.</p></div></div></div></div>
2410<div class="section">
2411<h2><a name="ORDER_BY.2C_LIMIT.2C_and_OFFSET_Clauses"></a><a name="Order_By_clauses" id="Order_By_clauses">ORDER BY, LIMIT, and OFFSET Clauses</a></h2><hr />
2412<div class="section">
2413<h3><a name="OrderbyClause"></a>OrderbyClause</h3>
2414<p><b><img src="../images/diagrams/OrderbyClause.png" alt="" /></b></p></div>
2415<div class="section">
2416<h3><a name="LimitClause"></a>LimitClause</h3>
2417<p><b><img src="../images/diagrams/LimitClause.png" alt="" /></b></p></div>
2418<div class="section">
2419<h3><a name="OffsetClause"></a>OffsetClause</h3></div></div>
2420<div class="section">
2421<h2><b><img src="../images/diagrams/OffsetClause.png" alt="" /></b></h2>
2422<p>The last three (optional) clauses to be processed in a query are <tt>ORDER BY</tt>, <tt>LIMIT</tt>, and <tt>OFFSET</tt>.</p>
2423<p>The <tt>ORDER BY</tt> clause is used to globally sort data in either ascending order (i.e., <tt>ASC</tt>) or descending order (i.e., <tt>DESC</tt>). During ordering, <tt>MISSING</tt> and <tt>NULL</tt> are treated as being smaller than any other value if they are encountered in the ordering key(s). <tt>MISSING</tt> is treated as smaller than <tt>NULL</tt> if both occur in the data being sorted. The ordering of values of a given type is consistent with its type&#x2019;s <tt>&lt;=</tt> ordering; the ordering of values across types is implementation-defined but stable.</p>
2424<p>The <tt>LIMIT</tt> clause is used to limit the result set to a specified maximum size. The optional <tt>OFFSET</tt> clause is used to specify a number of items in the output stream to be discarded before the query result begins. The <tt>OFFSET</tt> can also be used as a standalone clause, without the <tt>LIMIT</tt>.</p>
2425<p>The following example illustrates use of the <tt>ORDER BY</tt> and <tt>LIMIT</tt> clauses.</p>
2426<div class="section">
2427<div class="section">
2428<div class="section">
2429<h5><a name="Example"></a>Example</h5>
2430<p>(Q3.27) Return the top three customers by rating.</p>
2431
2432<div>
2433<div>
2434<pre class="source">FROM customers AS c
2435SELECT c.custid, c.name, c.rating
2436ORDER BY c.rating DESC
2437LIMIT 3;
2438</pre></div></div>
2439
2440<p>Result:</p>
2441
2442<div>
2443<div>
2444<pre class="source">[
2445 {
2446 &quot;custid&quot;: &quot;C13&quot;,
2447 &quot;name&quot;: &quot;T. Cody&quot;,
2448 &quot;rating&quot;: 750
2449 },
2450 {
2451 &quot;custid&quot;: &quot;C37&quot;,
2452 &quot;name&quot;: &quot;T. Henry&quot;,
2453 &quot;rating&quot;: 750
2454 },
2455 {
2456 &quot;custid&quot;: &quot;C25&quot;,
2457 &quot;name&quot;: &quot;M. Sinclair&quot;,
2458 &quot;rating&quot;: 690
2459 }
2460]
2461</pre></div></div>
2462
2463<p>The following example illustrates the use of <tt>OFFSET</tt>:</p></div>
2464<div class="section">
2465<h5><a name="Example"></a>Example</h5>
2466<p>(Q3.38) Find the customer with the third-highest credit rating.</p>
2467
2468<div>
2469<div>
2470<pre class="source">FROM customers AS c
2471SELECT c.custid, c.name, c.rating
2472ORDER BY c.rating DESC
2473LIMIT 1 OFFSET 2;
2474</pre></div></div>
2475
2476<p>Result:</p>
2477
2478<div>
2479<div>
2480<pre class="source">[
2481 {
2482 &quot;custid&quot;: &quot;C25&quot;,
2483 &quot;name&quot;: &quot;M. Sinclair&quot;,
2484 &quot;rating&quot;: 690
2485 }
2486]
2487</pre></div></div>
2488</div></div></div></div>
2489<div class="section">
2490<h2><a name="Subqueries" id="Subqueries">Subqueries</a></h2><hr />
2491<div class="section">
2492<h3><a name="Subquery"></a>Subquery</h3>
2493<p><b><img src="../images/diagrams/Subquery.png" alt="" /></b></p><hr />
2494<p>A subquery is simply a query surrounded by parentheses. In SQL++, a subquery can appear anywhere that an expression can appear. Like any query, a subquery always returns a collection, even if the collection contains only a single value or is empty. If the subquery has a SELECT clause, it returns a collection of objects. If the subquery has a SELECT VALUE clause, it returns a collection of scalar values. If a single scalar value is expected, the indexing operator [0] can be used to extract the single scalar value from the collection.</p>
2495<div class="section">
2496<div class="section">
2497<h5><a name="Example"></a>Example</h5>
2498<p>(Q3.29)(Subquery in SELECT clause) For every order that includes item no. 120, find the order number, customer id, and customer name.</p>
2499<p>Here, the subquery is used to find a customer name, given a customer id. Since the outer query expects a scalar result, the subquery uses SELECT VALUE and is followed by the indexing operator [0].</p>
2500
2501<div>
2502<div>
2503<pre class="source">FROM orders AS o, o.items AS i
2504WHERE i.itemno = 120
2505SELECT o.orderno, o.custid,
2506 (FROM customers AS c
2507 WHERE c.custid = o.custid
2508 SELECT VALUE c.name)[0] AS name;
2509</pre></div></div>
2510
2511<p>Result:</p>
2512
2513<div>
2514<div>
2515<pre class="source">[
2516 {
2517 &quot;orderno&quot;: 1003,
2518 &quot;custid&quot;: &quot;C31&quot;,
2519 &quot;name&quot;: &quot;B. Pruitt&quot;
2520 },
2521 {
2522 &quot;orderno&quot;: 1006,
2523 &quot;custid&quot;: &quot;C41&quot;,
2524 &quot;name&quot;: &quot;R. Dodge&quot;
2525 }
2526]
2527</pre></div></div>
2528</div>
2529<div class="section">
2530<h5><a name="Example"></a>Example</h5>
2531<p>(Q3.30) (Subquery in WHERE clause) Find the customer number, name, and rating of all customers whose rating is greater than the average rating.</p>
2532<p>Here, the subquery is used to find the average rating among all customers. Once again, SELECT VALUE and indexing [0] have been used to get a single scalar value.</p>
2533
2534<div>
2535<div>
2536<pre class="source">FROM customers AS c1
2537WHERE c1.rating &gt;
2538 (FROM customers AS c2
2539 SELECT VALUE AVG(c2.rating))[0]
2540SELECT c1.custid, c1.name, c1.rating;
2541</pre></div></div>
2542
2543<p>Result:</p>
2544
2545<div>
2546<div>
2547<pre class="source">[
2548 {
2549 &quot;custid&quot;: &quot;C13&quot;,
2550 &quot;name&quot;: &quot;T. Cody&quot;,
2551 &quot;rating&quot;: 750
2552 },
2553 {
2554 &quot;custid&quot;: &quot;C25&quot;,
2555 &quot;name&quot;: &quot;M. Sinclair&quot;,
2556 &quot;rating&quot;: 690
2557 },
2558 {
2559 &quot;custid&quot;: &quot;C37&quot;,
2560 &quot;name&quot;: &quot;T. Henry&quot;,
2561 &quot;rating&quot;: 750
2562 }
2563]
2564</pre></div></div>
2565</div>
2566<div class="section">
2567<h5><a name="Example"></a>Example</h5>
2568<p>(Q3.31) (Subquery in FROM clause) Compute the total revenue (sum over items of quantity time price) for each order, then find the average, maximum, and minimum total revenue over all orders.</p>
2569<p>Here, the FROM clause expects to iterate over a collection of objects, so the subquery uses an ordinary SELECT and does not need to be indexed. You might think of a FROM clause as a &#x201c;natural home&#x201d; for a subquery.</p>
2570
2571<div>
2572<div>
2573<pre class="source">FROM
2574 (FROM orders AS o, o.items AS i
2575 GROUP BY o.orderno
2576 SELECT o.orderno, SUM(i.qty * i.price) AS revenue
2577 ) AS r
2578SELECT AVG(r.revenue) AS average,
2579 MIN(r.revenue) AS minimum,
2580 MAX(r.revenue) AS maximum;
2581</pre></div></div>
2582
2583<p>Result:</p>
2584
2585<div>
2586<div>
2587<pre class="source">[
2588 {
2589 &quot;average&quot;: 4669.99,
2590 &quot;minimum&quot;: 130.45,
2591 &quot;maximum&quot;: 18847.58
2592 }
2593]
2594</pre></div></div>
2595
2596<p>Note the similarity between Q3.26 and Q3.31. This illustrates how a subquery can often be moved into a <tt>WITH</tt> clause to improve the modularity and readability of a query.</p><!--
2597 ! Licensed to the Apache Software Foundation (ASF) under one
2598 ! or more contributor license agreements. See the NOTICE file
2599 ! distributed with this work for additional information
2600 ! regarding copyright ownership. The ASF licenses this file
2601 ! to you under the Apache License, Version 2.0 (the
2602 ! "License"); you may not use this file except in compliance
2603 ! with the License. You may obtain a copy of the License at
2604 !
2605 ! http://www.apache.org/licenses/LICENSE-2.0
2606 !
2607 ! Unless required by applicable law or agreed to in writing,
2608 ! software distributed under the License is distributed on an
2609 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2610 ! KIND, either express or implied. See the License for the
2611 ! specific language governing permissions and limitations
2612 ! under the License.
2613 !-->
2614
2615<h1><a name="Over_clauses" id="Over_clauses">4. Window Functions</a></h1><!--
2616 ! Licensed to the Apache Software Foundation (ASF) under one
2617 ! or more contributor license agreements. See the NOTICE file
2618 ! distributed with this work for additional information
2619 ! regarding copyright ownership. The ASF licenses this file
2620 ! to you under the Apache License, Version 2.0 (the
2621 ! "License"); you may not use this file except in compliance
2622 ! with the License. You may obtain a copy of the License at
2623 !
2624 ! http://www.apache.org/licenses/LICENSE-2.0
2625 !
2626 ! Unless required by applicable law or agreed to in writing,
2627 ! software distributed under the License is distributed on an
2628 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2629 ! KIND, either express or implied. See the License for the
2630 ! specific language governing permissions and limitations
2631 ! under the License.
2632 !-->
2633
2634<p>Window functions are special functions that compute aggregate values over a &#x201c;window&#x201d; of input data. Like an ordinary function, a window function returns a value for every item in the input dataset. But in the case of a window function, the value returned by the function can depend not only on the argument of the function, but also on other items in the same collection. For example, a window function applied to a set of employees might return the rank of each employee in the set, as measured by salary. As another example, a window function applied to a set of items, ordered by purchase date, might return the running total of the cost of the items.</p>
2635<p>A window function call is identified by an <tt>OVER</tt> clause, which can specify three things: partitioning, ordering, and framing. The partitioning specification is like a <tt>GROUP BY</tt>: it splits the input data into partitions. For example, a set of employees might be partitioned by department. The window function, when applied to a given object, is influenced only by other objects in the same partition. The ordering specification is like an <tt>ORDER BY</tt>: it determines the ordering of the objects in each partition. The framing specification defines a &#x201c;frame&#x201d; that moves through the partition, defining how the result for each object depends on nearby objects. For example, the frame for a current object might consist of the two objects before and after the current one; or it might consist of all the objects before the current one in the same partition. A window function call may also specify some options that control (for example) how nulls are handled by the function.</p>
2636<p>Here is an example of a window function call:</p>
2637
2638<div>
2639<div>
2640<pre class="source">SELECT deptno, purchase_date, item, cost,
2641 SUM(cost) OVER (
2642 PARTITION BY deptno
2643 ORDER BY purchase_date
2644 ROWS UNBOUNDED PRECEDING) AS running_total_cost
2645FROM purchases
2646ORDER BY deptno, purchase_date
2647</pre></div></div>
2648
2649<p>This example partitions the <tt>purchases</tt> dataset by department number. Within each department, it orders the <tt>purchases</tt> by date and computes a running total cost for each item, using the frame specification <tt>ROWS UNBOUNDED PRECEDING</tt>. Note that the <tt>ORDER BY</tt> clause in the window function is separate and independent from the <tt>ORDER BY</tt> clause of the query as a whole.</p>
2650<p>The general syntax of a window function call is specified in this section. SQL++ has a set of builtin window functions, which are listed and explained in their respective <a href="builtins.html#WindowFunctions">section</a> of the builtin functions page. In addition, standard SQL aggregate functions such as <tt>SUM</tt> and <tt>AVG</tt> can be used as window functions if they are used with an <tt>OVER</tt> clause. The query language has a dedicated set of window functions. Aggregate functions can also be used as window functions, when they are used with an <tt>OVER</tt> clause.</p></div></div></div></div>
2651<div class="section">
2652<h2><a name="Window_Function_Call"></a><a name="Window_function_call" id="Window_function_call">Window Function Call</a></h2><hr />
2653<div class="section">
2654<h3><a name="WindowFunctionCall"></a>WindowFunctionCall</h3>
2655<p><b><img src="../images/diagrams/WindowFunctionCall.png" alt="" /></b></p></div>
2656<div class="section">
2657<h3><a name="WindowFunctionType"></a>WindowFunctionType</h3>
2658<p><b><img src="../images/diagrams/WindowFunctionType.png" alt="" /></b></p><hr />
2659<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section for a list of aggregate functions.</p>
2660<p>Refer to the <a href="builtins.html#WindowFunctions">Window Functions</a> section for a list of window functions.</p></div>
2661<div class="section">
2662<h3><a name="Window_Function_Arguments"></a><a name="Window_function_arguments" id="Window_function_arguments">Window Function Arguments</a></h3><hr /></div>
2663<div class="section">
2664<h3><a name="WindowFunctionArguments"></a>WindowFunctionArguments</h3>
2665<p><b><img src="../images/diagrams/WindowFunctionArguments.png" alt="" /></b></p><hr />
2666<p>Refer to the <a href="builtins.html#AggregateFunctions">Aggregate Functions</a> section or the <a href="builtins.html#WindowFunctions">Window Functions</a> section for details of the arguments for individual functions.</p></div>
2667<div class="section">
2668<h3><a name="Window_Function_Options"></a><a name="Window_function_options" id="Window_function_options">Window Function Options</a></h3><hr /></div>
2669<div class="section">
2670<h3><a name="WindowFunctionOptions"></a>WindowFunctionOptions</h3>
2671<p><b><img src="../images/diagrams/WindowFunctionOptions.png" alt="" /></b></p><hr />
2672<p>Window function options cannot be used with <a href="builtins.html#AggregateFunctions">aggregate functions</a>.</p>
2673<p>Window function options can only be used with some <a href="builtins.html#WindowFunctions">window functions</a>, as described below.</p>
2674<p>The <i>FROM modifier</i> determines whether the computation begins at the first or last tuple in the window. It is optional and can only be used with the <tt>nth_value()</tt> function. If it is omitted, the default setting is <tt>FROM FIRST</tt>.</p>
2675<p>The <i>NULLS modifier</i> determines whether NULL values are included in the computation, or ignored. MISSING values are treated the same way as NULL values. It is also optional and can only be used with the <tt>first_value()</tt>, <tt>last_value()</tt>, <tt>nth_value()</tt>, <tt>lag()</tt>, and <tt>lead()</tt> functions. If omitted, the default setting is <tt>RESPECT NULLS</tt>.</p></div>
2676<div class="section">
2677<h3><a name="Window_Frame_Variable"></a><a name="Window_frame_variable" id="Window_frame_variable">Window Frame Variable</a></h3>
2678<p>The <tt>AS</tt> keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents of the frame. When using a built-in <a href="builtins.html#AggregateFunctions">aggregate function</a> as a window function, the function&#x2019;s argument must be a subquery which refers to this alias, for example:</p>
2679
2680<div>
2681<div>
2682<pre class="source">SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field))
2683OVER alias AS (PARTITION BY &#x2026; ORDER BY &#x2026;)
2684FROM source AS src
2685</pre></div></div>
2686
2687<p>The alias is not necessary when using a <a href="builtins.html#WindowFunctions">window function</a>, or when using a standard SQL aggregate function with the <tt>OVER</tt> clause.</p></div>
2688<div class="section">
2689<h3><a name="Window_Definition"></a><a name="Window_definition" id="Window_definition">Window Definition</a></h3><hr /></div>
2690<div class="section">
2691<h3><a name="WindowDefinition"></a>WindowDefinition</h3>
2692<p><b><img src="../images/diagrams/WindowDefinition.png" alt="" /></b></p><hr />
2693<p>The <i>window definition</i> specifies the partitioning, ordering, and framing for window functions.</p>
2694<div class="section">
2695<h4><a name="Window_Partition_Clause"></a><a name="Window_partition_clause" id="Window_partition_clause">Window Partition Clause</a></h4><hr /></div></div>
2696<div class="section">
2697<h3><a name="WindowPartitionClause"></a>WindowPartitionClause</h3>
2698<p><b><img src="../images/diagrams/WindowPartitionClause.png" alt="" /></b></p><hr />
2699<p>The <i>window partition clause</i> divides the tuples into logical partitions using one or more expressions.</p>
2700<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
2701<p>This clause is optional. If omitted, all tuples are united in a single partition.</p>
2702<div class="section">
2703<h4><a name="Window_Order_Clause"></a><a name="Window_order_clause" id="Window_order_clause">Window Order Clause</a></h4><hr /></div></div>
2704<div class="section">
2705<h3><a name="WindowOrderClause"></a>WindowOrderClause</h3>
2706<p><b><img src="../images/diagrams/WindowOrderClause.png" alt="" /></b></p><hr />
2707<p>The <i>window order clause</i> determines how tuples are ordered within each partition. The window function works on tuples in the order specified by this clause.</p>
2708<p>This clause may be used with any <a href="builtins.html#WindowFunctions">window function</a>, or any <a href="builtins.html#AggregateFunctions">aggregate function</a> used as a window function.</p>
2709<p>This clause is optional. If omitted, all tuples are considered peers, i.e. their order is tied. When tuples in the window partition are tied, each window function behaves differently.</p>
2710<ul>
2711
2712<li>
2713
2714<p>The <tt>row_number()</tt> function returns a distinct number for each tuple. If tuples are tied, the results may be unpredictable.</p>
2715</li>
2716<li>
2717
2718<p>The <tt>rank()</tt>, <tt>dense_rank()</tt>, <tt>percent_rank()</tt>, and <tt>cume_dist()</tt> functions return the same result for each tuple.</p>
2719</li>
2720<li>
2721
2722<p>For other functions, if the <a href="#Window_frame_clause">window frame</a> is defined by <tt>ROWS</tt>, the results may be unpredictable. If the window frame is defined by <tt>RANGE</tt> or <tt>GROUPS</tt>, the results are same for each tuple.</p>
2723</li>
2724</ul>
2725<div class="section">
2726<div class="section">
2727<h5><a name="Note"></a>Note</h5>
2728<p>This clause does not guarantee the overall order of the query results. To guarantee the order of the final results, use the query <tt>ORDER BY</tt> clause.</p></div></div>
2729<div class="section">
2730<h4><a name="Window_Frame_Clause"></a><a name="Window_frame_clause" id="Window_frame_clause">Window Frame Clause</a></h4></div></div>
2731<div class="section">
2732<h3><a name="WindowFrameClause"></a>WindowFrameClause</h3>
2733<p><b><img src="../images/diagrams/WindowFrameClause.png" alt="" /></b></p>
2734<p>The <i>window frame clause</i> defines the window frame. It can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> - refer to the descriptions of individual functions for more details. It is optional and allowed only when the <a href="#Window_order_clause">window order clause</a> is present.</p>
2735<ul>
2736
2737<li>
2738
2739<p>If this clause is omitted and there is no <a href="#Window_order_clause">window order clause</a>, the window frame is the entire partition.</p>
2740</li>
2741<li>
2742
2743<p>If this clause is omitted but there is a <a href="#Window_order_clause">window order clause</a>, the window frame becomes all tuples in the partition preceding the current tuple and its peers - the same as <tt>RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW</tt>.</p>
2744</li>
2745</ul>
2746<p>The window frame can be defined in the following ways:</p>
2747<ul>
2748
2749<li>
2750
2751<p><tt>ROWS</tt>: Counts the exact number of tuples within the frame. If window ordering doesn&#x2019;t result in unique ordering, the function may produce unpredictable results. You can add a unique expression or more window ordering expressions to produce unique ordering.</p>
2752</li>
2753<li>
2754
2755<p><tt>RANGE</tt>: Looks for a value offset within the frame. The function produces deterministic results.</p>
2756</li>
2757<li>
2758
2759<p><tt>GROUPS</tt>: Counts all groups of tied rows within the frame. The function produces deterministic results.</p>
2760</li>
2761</ul>
2762<div class="section">
2763<div class="section">
2764<h5><a name="Note"></a>Note</h5>
2765<p>If this clause uses <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, the <a href="#Window_order_clause">window order clause</a> must have only a single ordering term.</p>
2766<p>The ordering term expression must evaluate to a number.</p>
2767<p>If these conditions are not met, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0. This restriction does not apply when the window frame uses <tt>ROWS</tt> or <tt>GROUPS</tt>.</p></div>
2768<div class="section">
2769<h5><a name="Tip"></a>Tip</h5>
2770<p>The <tt>RANGE</tt> window frame is commonly used to define window frames based on date or time.</p>
2771<p>If you want to use <tt>RANGE</tt> with either <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt>, and you want to use an ordering expression based on date or time, the expression in <i>Expr</i> <tt>PRECEDING</tt> or <i>Expr</i> <tt>FOLLOWING</tt> must use a data type that can be added to the ordering expression.</p></div></div>
2772<div class="section">
2773<h4><a name="Window_Frame_Extent"></a><a name="Window_frame_extent" id="Window_frame_extent">Window Frame Extent</a></h4><hr /></div></div>
2774<div class="section">
2775<h3><a name="WindowFrameExtent"></a>WindowFrameExtent</h3>
2776<p><b><img src="../images/diagrams/WindowFrameExtent.png" alt="" /></b></p><hr />
2777<p>The <i>window frame extent clause</i> specifies the start point and end point of the window frame. The expression before <tt>AND</tt> is the start point and the expression after <tt>AND</tt> is the end point. If <tt>BETWEEN</tt> is omitted, you can only specify the start point; the end point becomes <tt>CURRENT ROW</tt>.</p>
2778<p>The window frame end point can&#x2019;t be before the start point. If this clause violates this restriction explicitly, an error will result. If it violates this restriction implicitly, the window frame will be empty, which means the window function will return its default value: in most cases this is <tt>null</tt>, except for <tt>strict_count()</tt> or <tt>array_count()</tt>, whose default value is 0.</p>
2779<p>Window frame extents that result in an explicit violation are:</p>
2780<ul>
2781
2782<li>
2783
2784<p><tt>BETWEEN CURRENT ROW AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
2785</li>
2786<li>
2787
2788<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>PRECEDING</tt></p>
2789</li>
2790<li>
2791
2792<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND CURRENT ROW</tt></p>
2793</li>
2794</ul>
2795<p>Window frame extents that result in an implicit violation are:</p>
2796<ul>
2797
2798<li>
2799
2800<p><tt>BETWEEN UNBOUNDED PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> - if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
2801</li>
2802<li>
2803
2804<p><tt>BETWEEN</tt> <i>Expr</i> <tt>PRECEDING AND</tt> <i>Expr</i> <tt>PRECEDING</tt> - if the second <i>Expr</i> is greater than or equal to the first <i>Expr</i>, all result sets will generate an empty window frame.</p>
2805</li>
2806<li>
2807
2808<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND</tt> <i>Expr</i> <tt>FOLLOWING</tt> - if the first <i>Expr</i> is greater than or equal to the second <i>Expr</i>, all result sets will generate an empty window frame.</p>
2809</li>
2810<li>
2811
2812<p><tt>BETWEEN</tt> <i>Expr</i> <tt>FOLLOWING AND UNBOUNDED FOLLOWING</tt> - if <i>Expr</i> is too high, some tuples may generate an empty window frame.</p>
2813</li>
2814<li>
2815
2816<p>If the <a href="#Window_frame_exclusion">window frame exclusion clause</a> is present, any window frame specification may result in empty window frame.</p>
2817</li>
2818</ul>
2819<p>The <i>Expr</i> must be a positive constant or an expression that evaluates as a positive number. For <tt>ROWS</tt> or <tt>GROUPS</tt>, the <i>Expr</i> must be an integer.</p>
2820<div class="section">
2821<h4><a name="Window_Frame_Exclusion"></a><a name="Window_frame_exclusion" id="Window_frame_exclusion">Window Frame Exclusion</a></h4><hr /></div></div>
2822<div class="section">
2823<h3><a name="WindowFrameExclusion"></a>WindowFrameExclusion</h3>
2824<p><b><img src="../images/diagrams/WindowFrameExclusion.png" alt="" /></b></p><hr />
2825<p>The <i>window frame exclusion clause</i> enables you to exclude specified tuples from the window frame.</p>
2826<p>This clause can be used with all <a href="builtins.html#AggregateFunctions">aggregate functions</a> and some <a href="builtins.html#WindowFunctions">window functions</a> - refer to the descriptions of individual functions for more details.</p>
2827<p>This clause is allowed only when the <a href="#Window_frame_clause">window frame clause</a> is present.</p>
2828<p>This clause is optional. If this clause is omitted, the default is no exclusion - the same as <tt>EXCLUDE NO OTHERS</tt>.</p>
2829<ul>
2830
2831<li>
2832
2833<p><tt>EXCLUDE CURRENT ROW</tt>: If the current tuple is still part of the window frame, it is removed from the window frame.</p>
2834</li>
2835<li>
2836
2837<p><tt>EXCLUDE GROUP</tt>: The current tuple and any peers of the current tuple are removed from the window frame.</p>
2838</li>
2839<li>
2840
2841<p><tt>EXCLUDE TIES</tt>: Any peers of the current tuple, but not the current tuple itself, are removed from the window frame.</p>
2842</li>
2843<li>
2844
2845<p><tt>EXCLUDE NO OTHERS</tt>: No additional tuples are removed from the window frame.</p>
2846</li>
2847</ul>
2848<p>If the current tuple is already removed from the window frame, then it remains removed from the window frame.</p><!--
2849 ! Licensed to the Apache Software Foundation (ASF) under one
2850 ! or more contributor license agreements. See the NOTICE file
2851 ! distributed with this work for additional information
2852 ! regarding copyright ownership. The ASF licenses this file
2853 ! to you under the Apache License, Version 2.0 (the
2854 ! "License"); you may not use this file except in compliance
2855 ! with the License. You may obtain a copy of the License at
2856 !
2857 ! http://www.apache.org/licenses/LICENSE-2.0
2858 !
2859 ! Unless required by applicable law or agreed to in writing,
2860 ! software distributed under the License is distributed on an
2861 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2862 ! KIND, either express or implied. See the License for the
2863 ! specific language governing permissions and limitations
2864 ! under the License.
2865 !-->
2866
2867<h1><a name="Errors" id="Errors">5. Errors</a></h1><!--
2868 ! Licensed to the Apache Software Foundation (ASF) under one
2869 ! or more contributor license agreements. See the NOTICE file
2870 ! distributed with this work for additional information
2871 ! regarding copyright ownership. The ASF licenses this file
2872 ! to you under the Apache License, Version 2.0 (the
2873 ! "License"); you may not use this file except in compliance
2874 ! with the License. You may obtain a copy of the License at
2875 !
2876 ! http://www.apache.org/licenses/LICENSE-2.0
2877 !
2878 ! Unless required by applicable law or agreed to in writing,
2879 ! software distributed under the License is distributed on an
2880 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
2881 ! KIND, either express or implied. See the License for the
2882 ! specific language governing permissions and limitations
2883 ! under the License.
2884 !-->
2885
2886<p>A query can potentially result in one of the following errors:</p>
2887<ul>
2888
2889<li>syntax error,</li>
2890<li>identifier resolution error,</li>
2891<li>type error,</li>
2892<li>resource error.</li>
2893</ul>
2894<p>If the query processor runs into any error, it will terminate the ongoing processing of the query and immediately return an error message to the client.</p></div></div>
2895<div class="section">
2896<h2><a name="Syntax_Errors"></a><a name="Syntax_errors" id="Syntax_errors">Syntax Errors</a></h2>
2897<p>A valid query must satisfy the grammar rules of the query language. Otherwise, a syntax error will be raised.</p>
2898<div class="section">
2899<div class="section">
2900<div class="section">
2901<h5><a name="Example"></a>Example</h5>
2902<p>(Q4.1)</p>
2903
2904<div>
2905<div>
2906<pre class="source">customers AS c
2907SELECT *
2908</pre></div></div>
2909
2910<p>Since the queryhas no <tt>FROM</tt> keyword before the dataset <tt>customers</tt>, we will get a syntax error as follows:</p>
2911
2912<div>
2913<div>
2914<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 2 &gt;&gt;customers AS c&lt;&lt; Encountered \&quot;AS\&quot; at column 11. &quot;
2915</pre></div></div>
2916</div>
2917<div class="section">
2918<h5><a name="Example"></a>Example</h5>
2919<p>(Q4.2)</p>
2920
2921<div>
2922<div>
2923<pre class="source"> FROM customers AS c
2924 WHERE type=&quot;advertiser&quot;
2925 SELECT *;
2926</pre></div></div>
2927
2928<p>Since &#x201c;type&#x201d; is a reserved keyword in the query parser, we will get a syntax error as follows:</p>
2929
2930<div>
2931<div>
2932<pre class="source">ERROR: Code: 1 &quot;ASX1001: Syntax error: In line 3 &gt;&gt; WHERE type=\&quot;advertiser\&quot;&lt;&lt; Encountered \&quot;type\&quot; at column 8. &quot;;
2933</pre></div></div>
2934</div></div></div></div>
2935<div class="section">
2936<h2><a name="Identifier_Resolution_Errors"></a><a name="Identifier_resolution_errors" id="Identifier_resolution_errors">Identifier Resolution Errors</a></h2>
2937<p>Referring to an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access.</p>
2938<div class="section">
2939<div class="section">
2940<div class="section">
2941<h5><a name="Example"></a>Example</h5>
2942<p>(Q4.3)</p>
2943
2944<div>
2945<div>
2946<pre class="source"> FROM customer AS c
2947 SELECT *
2948</pre></div></div>
2949
2950<p>If we have a typo as above in &#x201c;customers&#x201d; that misses the dataset name&#x2019;s ending &#x201c;s&#x201d;, we will get an identifier resolution error as follows:</p>
2951
2952<div>
2953<div>
2954<pre class="source">ERROR: Code: 1 &quot;ASX1077: Cannot find dataset customer in dataverse Commerce nor an alias with name customer! (in line 2, at column 7)&quot;
2955</pre></div></div>
2956</div>
2957<div class="section">
2958<h5><a name="Example"></a>Example</h5>
2959<p>(Q4.4)</p>
2960
2961<div>
2962<div>
2963<pre class="source"> FROM customers AS c JOIN orders AS o ON c.custid = o.custid
2964 SELECT name, orderno;
2965</pre></div></div>
2966
2967<p>If the compiler cannot figure out how to resolve an unqualified field name, which will occur if there is more than one variable in scope (e.g., <tt>customers AS c</tt> and <tt>orders AS o</tt> as above), we will get an identifier resolution error as follows:</p>
2968
2969<div>
2970<div>
2971<pre class="source">ERROR: Code: 1 &quot;ASX1074: Cannot resolve ambiguous alias reference for identifier name (in line 3, at column 9)&quot;
2972</pre></div></div>
2973
2974<p>The same can happen when failing to properly identify the <tt>GROUP BY</tt> expression.</p>
2975<p>(Q4.5)</p>
2976
2977<div>
2978<div>
2979<pre class="source">SELECT o.custid, COUNT(o.orderno) AS `order count`
2980FROM orders AS o
2981GROUP BY custid;
2982</pre></div></div>
2983
2984<p>Result:</p>
2985
2986<div>
2987<div>
2988<pre class="source">ERROR: Code: 1 &quot;ASX1073: Cannot resolve alias reference for undefined identifier o (in line 2, at column 8)&quot;
2989</pre></div></div>
2990</div></div></div></div>
2991<div class="section">
2992<h2><a name="Type_Errors"></a><a name="Type_errors" id="Type_errors">Type Errors</a></h2>
2993<p>The query compiler does type checks based on its available type information. In addition, the query runtime also reports type errors if a data model instance it processes does not satisfy the type requirement.</p>
2994<div class="section">
2995<div class="section">
2996<div class="section">
2997<h5><a name="Example"></a>Example</h5>
2998<p>(Q4.6)</p>
2999
3000<div>
3001<div>
3002<pre class="source">get_day(10/11/2020);
3003</pre></div></div>
3004
3005<p>Since function <tt>get_day</tt> can only process duration, daytimeduration, date, or datetime input values, we will get a type error as follows:</p>
3006
3007<div>
3008<div>
3009<pre class="source">ERROR: Code: 1 &quot;ASX0002: Type mismatch: function get-day expects its 1st input parameter to be of type duration, daytimeduration, date or datetime, but the actual input type is double (in line 2, at column 1)&quot;
3010</pre></div></div>
3011</div></div></div></div>
3012<div class="section">
3013<h2><a name="Resource_Errors"></a><a name="Resource_errors" id="Resource_errors">Resource Errors</a></h2>
3014<p>A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource errors could be potentially be seen when running the system:</p>
3015
3016<div>
3017<div>
3018<pre class="source">Error: no space left on device
3019Error: too many open files
3020</pre></div></div>
3021
3022<p>The &#x201c;no space left on device&#x201d; issue usually can be fixed by cleaning up disk space and reserving more disk space for the system. The &#x201c;too many open files&#x201d; issue usually can be fixed by a system administrator, following the instructions <a class="externalLink" href="https://easyengine.io/tutorials/linux/increase-open-files-limit/">here</a>.</p><!--
3023 ! Licensed to the Apache Software Foundation (ASF) under one
3024 ! or more contributor license agreements. See the NOTICE file
3025 ! distributed with this work for additional information
3026 ! regarding copyright ownership. The ASF licenses this file
3027 ! to you under the Apache License, Version 2.0 (the
3028 ! "License"); you may not use this file except in compliance
3029 ! with the License. You may obtain a copy of the License at
3030 !
3031 ! http://www.apache.org/licenses/LICENSE-2.0
3032 !
3033 ! Unless required by applicable law or agreed to in writing,
3034 ! software distributed under the License is distributed on an
3035 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3036 ! KIND, either express or implied. See the License for the
3037 ! specific language governing permissions and limitations
3038 ! under the License.
3039 !-->
3040
3041<h1><a name="Vs_SQL-92" id="Vs_SQL-92">6. Differences from SQL-92</a></h1><!--
3042 ! Licensed to the Apache Software Foundation (ASF) under one
3043 ! or more contributor license agreements. See the NOTICE file
3044 ! distributed with this work for additional information
3045 ! regarding copyright ownership. The ASF licenses this file
3046 ! to you under the Apache License, Version 2.0 (the
3047 ! "License"); you may not use this file except in compliance
3048 ! with the License. You may obtain a copy of the License at
3049 !
3050 ! http://www.apache.org/licenses/LICENSE-2.0
3051 !
3052 ! Unless required by applicable law or agreed to in writing,
3053 ! software distributed under the License is distributed on an
3054 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3055 ! KIND, either express or implied. See the License for the
3056 ! specific language governing permissions and limitations
3057 ! under the License.
3058 !-->
3059
3060<p>SQL++ offers the following additional features beyond SQL-92:</p>
3061<ul>
3062
3063<li>Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.</li>
3064<li>Schema-free: The query language does not assume the existence of a static schema for any data that it processes.</li>
3065<li>Correlated <tt>FROM</tt> terms: A right-side <tt>FROM</tt> term expression can refer to variables defined by <tt>FROM</tt> terms on its left.</li>
3066<li>Powerful <tt>GROUP BY</tt>: In addition to a set of aggregate functions as in standard SQL, the groups created by the <tt>GROUP BY</tt> clause are directly usable in nested queries and/or to obtain nested results.</li>
3067<li>Generalized <tt>SELECT</tt> clause: A <tt>SELECT</tt> clause can return any type of collection, while in SQL-92, a <tt>SELECT</tt> clause has to return a (homogeneous) collection of objects.</li>
3068</ul>
3069<p>The following matrix is a quick &#x201c;SQL-92 compatibility cheat sheet&#x201d; for SQL++.</p>
3070<table border="0" class="table table-striped">
3071<thead>
3072
3073<tr class="a">
3074<th> Feature </th>
3075<th> SQL++ </th>
3076<th> SQL-92 </th>
3077<th> Why different? </th></tr>
3078</thead><tbody>
3079
3080<tr class="b">
3081<td> SELECT * </td>
3082<td> Returns nested objects </td>
3083<td> Returns flattened concatenated objects </td>
3084<td> Nested collections are 1st class citizens </td></tr>
3085<tr class="a">
3086<td> SELECT list </td>
3087<td> order not preserved </td>
3088<td> order preserved </td>
3089<td> Fields in a JSON object are not ordered </td></tr>
3090<tr class="b">
3091<td> Subquery </td>
3092<td> Returns a collection </td>
3093<td> The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function </td>
3094<td> Nested collections are 1st class citizens </td></tr>
3095<tr class="a">
3096<td> LEFT OUTER JOIN </td>
3097<td> Fills in <tt>MISSING</tt>(s) for non-matches </td>
3098<td> Fills in <tt>NULL</tt>(s) for non-matches </td>
3099<td> &#x201c;Absence&#x201d; is more appropriate than &#x201c;unknown&#x201d; here </td></tr>
3100<tr class="b">
3101<td> UNION ALL </td>
3102<td> Allows heterogeneous inputs and output </td>
3103<td> Input streams must be UNION-compatible and output field names are drawn from the first input stream </td>
3104<td> Heterogenity and nested collections are common </td></tr>
3105<tr class="a">
3106<td> IN constant_expr </td>
3107<td> The constant expression has to be an array or multiset, i.e., [..,..,&#x2026;] </td>
3108<td> The constant collection can be represented as comma-separated items in a paren pair </td>
3109<td> Nested collections are 1st class citizens </td></tr>
3110<tr class="b">
3111<td> String literal </td>
3112<td> Double quotes or single quotes </td>
3113<td> Single quotes only </td>
3114<td> Double quoted strings are pervasive in JSON</td></tr>
3115<tr class="a">
3116<td> Delimited identifiers </td>
3117<td> Backticks </td>
3118<td> Double quotes </td>
3119<td> Double quoted strings are pervasive in JSON </td></tr>
3120</tbody>
3121</table>
3122<p>The following SQL-92 features are not implemented yet. However, SQL++ does not conflict with these features:</p>
3123<ul>
3124
3125<li>CROSS JOIN, NATURAL JOIN, UNION JOIN</li>
3126<li>RIGHT and FULL OUTER JOIN</li>
3127<li>INTERSECT, EXCEPT, UNION with set semantics</li>
3128<li>CAST expression</li>
3129<li>COALESCE expression</li>
3130<li>ALL and SOME predicates for linking to subqueries</li>
3131<li>UNIQUE predicate (tests a collection for duplicates)</li>
3132<li>MATCH predicate (tests for referential integrity)</li>
3133<li>Row and Table constructors</li>
3134<li>Preserved order for expressions in a SELECT list</li>
3135</ul><!--
3136 ! Licensed to the Apache Software Foundation (ASF) under one
3137 ! or more contributor license agreements. See the NOTICE file
3138 ! distributed with this work for additional information
3139 ! regarding copyright ownership. The ASF licenses this file
3140 ! to you under the Apache License, Version 2.0 (the
3141 ! "License"); you may not use this file except in compliance
3142 ! with the License. You may obtain a copy of the License at
3143 !
3144 ! http://www.apache.org/licenses/LICENSE-2.0
3145 !
3146 ! Unless required by applicable law or agreed to in writing,
3147 ! software distributed under the License is distributed on an
3148 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3149 ! KIND, either express or implied. See the License for the
3150 ! specific language governing permissions and limitations
3151 ! under the License.
3152 !-->
3153
3154<h1><a name="DDL_and_DML_statements" id="DDL_and_DML_statements">7. DDL and DML statements</a></h1><hr />
3155<div class="section">
3156<h3><a name="Stmnt"></a>Stmnt</h3>
3157<p><b><img src="../images/diagrams/Stmnt.png" alt="" /></b></p></div>
3158<div class="section">
3159<h3><a name="SingleStmnt"></a>SingleStmnt</h3>
3160<p><b><img src="../images/diagrams/SingleStmnt.png" alt="" /></b></p><hr />
3161<p>In addition to queries, an implementation of SQL++ needs to support statements for data definition and manipulation purposes as well as controlling the context to be used in evaluating query expressions. This section details the DDL and DML statements supported in SQL++ as realized today in Apache AsterixDB.</p><!--
3162 ! Licensed to the Apache Software Foundation (ASF) under one
3163 ! or more contributor license agreements. See the NOTICE file
3164 ! distributed with this work for additional information
3165 ! regarding copyright ownership. The ASF licenses this file
3166 ! to you under the Apache License, Version 2.0 (the
3167 ! "License"); you may not use this file except in compliance
3168 ! with the License. You may obtain a copy of the License at
3169 !
3170 ! http://www.apache.org/licenses/LICENSE-2.0
3171 !
3172 ! Unless required by applicable law or agreed to in writing,
3173 ! software distributed under the License is distributed on an
3174 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3175 ! KIND, either express or implied. See the License for the
3176 ! specific language governing permissions and limitations
3177 ! under the License.
3178 !-->
3179</div></div>
3180<div class="section">
3181<h2><a name="Lifecycle_Management_Statements"></a><a name="Lifecycle_management_statements" id="Lifecycle_management_statements">Lifecycle Management Statements</a></h2>
3182<div class="section">
3183<h3><a name="Use_Statement"></a><a name="Use" id="Use">Use Statement</a></h3><hr /></div>
3184<div class="section">
3185<h3><a name="UseStmnt"></a>UseStmnt</h3>
3186<p><b><img src="../images/diagrams/UseStmnt.png" alt="" /></b></p><hr />
3187<p>At the uppermost level, the world of data is organized into data namespaces called <b>dataverses</b>. To set the default dataverse for statements, the <tt>USE</tt> statement is provided.</p>
3188<p>As an example, the following statement sets the default dataverse to be <tt>Commerce</tt>.</p>
3189
3190<div>
3191<div>
3192<pre class="source">USE Commerce;
3193</pre></div></div>
3194</div>
3195<div class="section">
3196<h3><a name="Set_Statement"></a><a name="Sets" id="Sets"> Set Statement</a></h3>
3197<p>The <tt>SET</tt> statement can be used to override certain configuration parameters. More information about <tt>SET</tt> can be found in <a href="#Performance_tuning">Appendix 2</a>.</p></div>
3198<div class="section">
3199<h3><a name="Function_Declaration"></a><a name="Functions" id="Functions"> Function Declaration</a></h3>
3200<p>When writing a complex query, it can sometimes be helpful to define one or more auxiliary functions that each address a sub-piece of the overall query.</p>
3201<p>The <tt>DECLARE FUNCTION</tt> statement supports the creation of such helper functions. In general, the function body (expression) can be any legal query expression.</p>
3202<p>The function named in the <tt>DECLARE FUNCTION</tt> statement is accessible only in the current query. To create a persistent function for use in multiple queries, use the <tt>CREATE FUNCTION</tt> statement.</p><hr /></div>
3203<div class="section">
3204<h3><a name="FunctionDeclaration"></a>FunctionDeclaration</h3>
3205<p><b><img src="../images/diagrams/FunctionDeclaration.png" alt="" /></b></p></div>
3206<div class="section">
3207<h3><a name="ParameterList"></a>ParameterList</h3>
3208<p><b><img src="../images/diagrams/ParameterList.png" alt="" /></b></p><hr />
3209<p>The following is a simple example of a temporary function definition and its use.</p>
3210<div class="section">
3211<div class="section">
3212<h5><a name="Example"></a>Example</h5>
3213
3214<div>
3215<div>
3216<pre class="source">DECLARE FUNCTION nameSearch(customerId){
3217 (SELECT c.custid, c.name
3218 FROM customers AS c
3219 WHERE c.custid = customerId)[0]
3220 };
3221
3222
3223SELECT VALUE nameSearch(&quot;C25&quot;);
3224</pre></div></div>
3225
3226<p>For our sample data set, this returns:</p>
3227
3228<div>
3229<div>
3230<pre class="source">[
3231 { &quot;custid&quot;: &quot;C25&quot;, &quot;name&quot;: &quot;M. Sinclair&quot; }
3232]
3233</pre></div></div>
3234</div></div></div>
3235<div class="section">
3236<h3><a name="Create_Statement"></a><a name="Create" id="Create"> Create Statement</a></h3><hr /></div>
3237<div class="section">
3238<h3><a name="CreateStmnt"></a>CreateStmnt</h3>
3239<p><b><img src="../images/diagrams/CreateStmnt.png" alt="" /></b></p></div>
3240<div class="section">
3241<h3><a name="QualifiedName"></a>QualifiedName</h3>
3242<p><b><img src="../images/diagrams/QualifiedName.png" alt="" /></b></p></div>
3243<div class="section">
3244<h3><a name="DoubleQualifiedName"></a>DoubleQualifiedName</h3>
3245<p><b><img src="../images/diagrams/DoubleQualifiedName.png" alt="" /></b></p><hr />
3246<p>The <tt>CREATE</tt> statement is used for creating dataverses as well as other persistent artifacts in a dataverse. It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined query functions.</p>
3247<div class="section">
3248<h4><a name="Create_Dataverse"></a><a name="Dataverses" id="Dataverses"> Create Dataverse</a></h4><hr /></div></div>
3249<div class="section">
3250<h3><a name="CreateDataverse"></a>CreateDataverse</h3>
3251<p><b><img src="../images/diagrams/CreateDataverse.png" alt="" /></b></p><hr />
3252<p>The <tt>CREATE DATAVERSE</tt> statement is used to create new dataverses. To ease the authoring of reusable query scripts, an optional <tt>IF NOT EXISTS</tt> clause is included to allow creation to be requested either unconditionally or only if the dataverse does not already exist. If this clause is absent, an error is returned if a dataverse with the indicated name already exists.</p>
3253<p>The following example creates a new dataverse named <tt>Commerce</tt> if one does not already exist.</p>
3254<div class="section">
3255<div class="section">
3256<h5><a name="Example"></a>Example</h5>
3257
3258<div>
3259<div>
3260<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
3261</pre></div></div>
3262</div></div>
3263<div class="section">
3264<h4><a name="Create_Type"></a><a name="Types" id="Types"> Create Type </a></h4><hr /></div></div>
3265<div class="section">
3266<h3><a name="CreateType"></a>CreateType</h3>
3267<p><b><img src="../images/diagrams/CreateType.png" alt="" /></b></p></div>
3268<div class="section">
3269<h3><a name="ObjectTypeDef"></a>ObjectTypeDef</h3>
3270<p><b><img src="../images/diagrams/ObjectTypeDef.png" alt="" /></b></p></div>
3271<div class="section">
3272<h3><a name="ObjectField"></a>ObjectField</h3>
3273<p><b><img src="../images/diagrams/ObjectField.png" alt="" /></b></p></div>
3274<div class="section">
3275<h3><a name="TypeExpr"></a>TypeExpr</h3>
3276<p><b><img src="../images/diagrams/TypeExpr.png" alt="" /></b></p></div>
3277<div class="section">
3278<h3><a name="ArrayTypeDef"></a>ArrayTypeDef</h3>
3279<p><b><img src="../images/diagrams/ArrayTypeDef.png" alt="" /></b></p></div>
3280<div class="section">
3281<h3><a name="MultisetTypeDef"></a>MultisetTypeDef</h3>
3282<p><b><img src="../images/diagrams/MultisetTypeDef.png" alt="" /></b></p></div>
3283<div class="section">
3284<h3><a name="TypeRef"></a>TypeRef</h3>
3285<p><b><img src="../images/diagrams/TypeRef.png" alt="" /></b></p><hr />
3286<p>The <tt>CREATE TYPE</tt> statement is used to create a new named datatype. This type can then be used to create stored collections or utilized when defining one or more other datatypes. Much more information about the data model is available in the <a href="../datamodel.html">data model reference guide</a>. A new type can be a object type, a renaming of another type, an array type, or a multiset type. A object type can be defined as being either open or closed. Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.</p>
3287<p>The following example creates three new object type called <tt>addressType</tt> , <tt>customerType</tt> and <tt>itemType</tt>. Their fields are essentially traditional typed name/value pairs (much like SQL fields). Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. Indeed many of the customer objects contain a rating as well, however this is not necessary for the customer object to be created. As can be seen in the sample data, customers can exist without ratings or with part (or all) of the address missing.</p>
3288<div class="section">
3289<div class="section">
3290<h5><a name="Example"></a>Example</h5>
3291
3292<div>
3293<div>
3294<pre class="source">CREATE TYPE addressType AS {
3295 street: string,
3296 city: string,
3297 zipcode: string?
3298};
3299
3300CREATE TYPE customerType AS {
3301 custid: string,
3302 name: string,
3303 address: addressType?
3304};
3305
3306CREATE TYPE itemType AS {
3307 itemno: int,
3308 qty: int,
3309 price: int
3310};
3311</pre></div></div>
3312
3313<p>Optionally, you may wish to create a type that has an automatically generated primary key field. The example below shows an alternate form of <tt>itemType</tt> which achieves this by setting its primary key, <tt>itemno</tt>, to UUID. (Refer to the Datasets section later for more details on such fields.)</p></div>
3314<div class="section">
3315<h5><a name="Example"></a>Example</h5>
3316
3317<div>
3318<div>
3319<pre class="source">CREATE TYPE itemType AS {
3320 itemno: uuid,
3321 qty: int,
3322 price: int
3323};
3324</pre></div></div>
3325
3326<p>Note that the type of the <tt>itemno</tt> in this example is UUID. This field type can be used if you want to have an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)</p>
3327<p>The next example creates a new object type, closed this time, called <tt>orderType</tt>. Instances of this closed type will not be permitted to have extra fields, although the <tt>ship_date</tt> field is marked as optional and may thus be <tt>NULL</tt> or <tt>MISSING</tt> in legal instances of the type. The items field is an array of instances of another object type, <tt>itemType</tt>.</p></div>
3328<div class="section">
3329<h5><a name="Example"></a>Example</h5>
3330
3331<div>
3332<div>
3333<pre class="source">CREATE TYPE orderType AS CLOSED {
3334 orderno: int,
3335 custid: string,
3336 order_date: string,
3337 ship_date: string?,
3338 items: [ itemType ]
3339};
3340</pre></div></div>
3341</div></div>
3342<div class="section">
3343<h4><a name="Create_Dataset"></a><a name="Datasets" id="Datasets"> Create Dataset</a></h4><hr /></div></div>
3344<div class="section">
3345<h3><a name="CreateDataset"></a>CreateDataset</h3>
3346<p><b><img src="../images/diagrams/CreateDataset.png" alt="" /></b></p></div>
3347<div class="section">
3348<h3><a name="CreateInternalDataset"></a>CreateInternalDataset</h3>
3349<p><b><img src="../images/diagrams/CreateInternalDataset.png" alt="" /></b></p></div>
3350<div class="section">
3351<h3><a name="CreateExternalDataset"></a>CreateExternalDataset</h3>
3352<p><b><img src="../images/diagrams/CreateExternalDataset.png" alt="" /></b></p></div>
3353<div class="section">
3354<h3><a name="AdapterName"></a>AdapterName</h3>
3355<p><b><img src="../images/diagrams/AdapterName.png" alt="" /></b></p></div>
3356<div class="section">
3357<h3><a name="Configuration"></a>Configuration</h3>
3358<p><b><img src="../images/diagrams/Configuration.png" alt="" /></b></p></div>
3359<div class="section">
3360<h3><a name="KeyValuePair"></a>KeyValuePair</h3>
3361<p><b><img src="../images/diagrams/KeyValuePair.png" alt="" /></b></p></div>
3362<div class="section">
3363<h3><a name="Properties"></a>Properties</h3>
3364<p><b><img src="../images/diagrams/Properties.png" alt="" /></b></p></div>
3365<div class="section">
3366<h3><a name="PrimaryKey"></a>PrimaryKey</h3>
3367<p><b><img src="../images/diagrams/PrimaryKey.png" alt="" /></b></p></div>
3368<div class="section">
3369<h3><a name="NestedField"></a>NestedField</h3>
3370<p><b><img src="../images/diagrams/NestedField.png" alt="" /></b></p></div>
3371<div class="section">
3372<h3><a name="CompactionPolicy"></a>CompactionPolicy</h3>
3373<p><b><img src="../images/diagrams/CompactionPolicy.png" alt="" /></b></p><hr />
3374<p>The <tt>CREATE DATASET</tt> statement is used to create a new dataset. Datasets are named, multisets of object type instances; they are where data lives persistently and are the usual targets for queries. Datasets are typed, and the system ensures that their contents conform to their type definitions. An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. It is required to have a specified unique primary key field which uniquely identifies the contained objects. (The primary key is also used in secondary indexes to identify the indexed primary data objects.)</p>
3375<p>Internal datasets contain several advanced options that can be specified when appropriate. One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting <tt>AUTOGENERATED</tt> after the <tt>PRIMARY KEY</tt> identifier. In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object&#x2019;s primary key field value will be auto-generated by the system.</p>
3376<p>Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the underlying LSM storage components to be merged. (The system supports Log-Structured Merge tree based physical storage for Internal datasets.) Currently the system supports four different component merging policies that can be chosen per dataset: no-merge, constant, prefix, and correlated-prefix. The no-merge policy simply never merges disk components. The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. The prefix policy relies on both component sizes and the number of components to decide which components to merge. It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component&#x2019;s sizes exceeds M or the number of components in the sequence exceeds another threshold C. If such a sequence exists, the components in the sequence are merged together to form a single component. Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. The system&#x2019;s default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.</p>
3377<p>Another advanced option shown in the syntax above, related to performance and mentioned above, is that a <b>filter</b> can optionally be created on a field to further optimize range queries with predicates on the filter&#x2019;s field. Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. (Refer to <a href="../sqlpp/filters.html">Filter-Based LSM Index Acceleration</a> for more information about filters.)</p>
3378<p>An External dataset, in contrast to an Internal dataset, has data stored outside of the system&#x2019;s control. Files living in HDFS or in the local filesystem(s) of a cluster&#x2019;s nodes are currently supported. External dataset support allows queries to treat foreign data as though it were stored in the system, making it possible to query &#x201c;legacy&#x201d; file data (for example, Hive data) without having to physically import it. When defining an External dataset, an appropriate adapter type must be selected for the desired external data. (See the <a href="../aql/externaldata.html">Guide to External Data</a> for more information on the available adapters.)</p>
3379<p>The following example creates an Internal dataset for storing FacefookUserType objects. It specifies that their id field is their primary key.</p>
3380<div class="section">
3381<h4><a name="Example"></a>Example</h4>
3382
3383<div>
3384<div>
3385<pre class="source">CREATE INTERNAL DATASET customers(customerType) PRIMARY KEY custid;
3386</pre></div></div>
3387
3388<p>The next example creates an Internal dataset (the default kind when no dataset kind is specified) for storing <tt>itemType</tt> objects might look like. It specifies that the <tt>itemno</tt> field should be used as the primary key for the dataset. It also specifies that the <tt>itemno</tt> field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.)</p>
3389<p>Note that the <tt>itemno</tt> field&#x2019;s declared type must be UUID in this case.</p></div>
3390<div class="section">
3391<h4><a name="Example"></a>Example</h4>
3392
3393<div>
3394<div>
3395<pre class="source">CREATE DATASET MyItems(itemType) PRIMARY KEY itemno AUTOGENERATED;
3396</pre></div></div>
3397
3398<p>The next example creates an External dataset for querying LineItemType objects. The choice of the <tt>hdfs</tt> adapter means that this dataset&#x2019;s data actually resides in HDFS. The example <tt>CREATE</tt> statement also provides parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format.</p></div>
3399<div class="section">
3400<h4><a name="Example"></a>Example</h4>
3401
3402<div>
3403<div>
3404<pre class="source">CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
3405 (&quot;hdfs&quot;=&quot;hdfs://HOST:PORT&quot;),
3406 (&quot;path&quot;=&quot;HDFS_PATH&quot;),
3407 (&quot;input-format&quot;=&quot;text-input-format&quot;),
3408 (&quot;format&quot;=&quot;delimited-text&quot;),
3409 (&quot;delimiter&quot;=&quot;|&quot;));
3410</pre></div></div>
3411</div>
3412<div class="section">
3413<h4><a name="Create_Index"></a><a name="Indices" id="Indices">Create Index</a></h4><hr /></div></div>
3414<div class="section">
3415<h3><a name="CreateIndex"></a>CreateIndex</h3>
3416<p><b><img src="../images/diagrams/CreateIndex.png" alt="" /></b></p></div>
3417<div class="section">
3418<h3><a name="CreateSecondaryIndex"></a>CreateSecondaryIndex</h3>
3419<p><b><img src="../images/diagrams/CreateSecondaryIndex.png" alt="" /></b></p></div>
3420<div class="section">
3421<h3><a name="CreatePrimaryKeyIndex"></a>CreatePrimaryKeyIndex</h3>
3422<p><b><img src="../images/diagrams/CreatePrimaryKeyIndex.png" alt="" /></b></p></div>
3423<div class="section">
3424<h3><a name="IndexField"></a>IndexField</h3>
3425<p><b><img src="../images/diagrams/IndexField.png" alt="" /></b></p></div>
3426<div class="section">
3427<h3><a name="NestedField"></a>NestedField</h3>
3428<p><b><img src="../images/diagrams/NestedField.png" alt="" /></b></p></div>
3429<div class="section">
3430<h3><a name="IndexType"></a>IndexType</h3>
3431<p><b><img src="../images/diagrams/IndexType.png" alt="" /></b></p><hr />
3432<p>The <tt>CREATE INDEX</tt> statement creates a secondary index on one or more fields of a specified dataset. Supported index types include <tt>BTREE</tt> for totally ordered datatypes, <tt>RTREE</tt> for spatial data, and <tt>KEYWORD</tt> and <tt>NGRAM</tt> for textual (string) data. An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier.</p>
3433<p>An indexed field is not required to be part of the datatype associated with a dataset if the dataset&#x2019;s datatype is declared as open <b>and</b> if the field&#x2019;s type is provided along with its name and if the <tt>ENFORCED</tt> keyword is specified at the end of the index definition. <tt>ENFORCING</tt> an open field introduces a check that makes sure that the actual type of the indexed field (if the optional field exists in the object) always matches this specified (open) field type.</p>
3434<p>The following example creates a btree index called <tt>cCustIdx</tt> on the <tt>custid</tt> field of the orders dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>custid</tt>field.</p>
3435<div class="section">
3436<h4><a name="Example"></a>Example</h4>
3437
3438<div>
3439<div>
3440<pre class="source">CREATE INDEX cCustIdx ON orders(custid) TYPE BTREE;
3441</pre></div></div>
3442
3443<p>The following example creates an open btree index called <tt>oCreatedTimeIdx</tt> on the (non-declared) <tt>createdTime</tt> field of the <tt>orders</tt> dataset having <tt>datetime</tt> type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>createdTime</tt> field. The index is enforced so that records that do not have the <tt>createdTime</tt> field or have a mismatched type on the field cannot be inserted into the dataset.</p></div>
3444<div class="section">
3445<h4><a name="Example"></a>Example</h4>
3446
3447<div>
3448<div>
3449<pre class="source">CREATE INDEX oCreatedTimeIdx ON orders(createdTime: datetime?) TYPE BTREE ENFORCED;
3450</pre></div></div>
3451
3452<p>The following example creates an open btree index called <tt>cAddedTimeIdx</tt> on the (non-declared) <tt>addedTime</tt> field of the <tt>customers</tt> dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>addedTime</tt> field. The index is not enforced so that records that do not have the <tt>addedTime</tt> field or have a mismatched type on the field can still be inserted into the dataset.</p></div>
3453<div class="section">
3454<h4><a name="Example"></a>Example</h4>
3455
3456<div>
3457<div>
3458<pre class="source">CREATE INDEX cAddedTimeIdx ON customers(addedTime: datetime?);
3459</pre></div></div>
3460
3461<p>The following example creates a btree index called <tt>oOrderUserNameIdx</tt> on <tt>orderUserName</tt>, a nested field residing within a object-valued user field in the <tt>orders</tt> dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the nested <tt>orderUserName</tt> field. Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field.</p></div>
3462<div class="section">
3463<h4><a name="Example"></a>Example</h4>
3464
3465<div>
3466<div>
3467<pre class="source">CREATE INDEX oOrderUserNameIdx ON orders(order.orderUserName) TYPE BTREE;
3468</pre></div></div>
3469
3470<p>The following example creates an open rtree index called <tt>oOrderLocIdx</tt> on the order-location field of the <tt>orders</tt> dataset. This index can be useful for accelerating queries that use the <a href="builtins.html#spatial_intersect"><tt>spatial-intersect</tt> function</a> in a predicate involving the sender-location field.</p></div>
3471<div class="section">
3472<h4><a name="Example"></a>Example</h4>
3473
3474<div>
3475<div>
3476<pre class="source">CREATE INDEX oOrderLocIDx ON orders(`order-location` : point?) TYPE RTREE ENFORCED;
3477</pre></div></div>
3478
3479<p>The following example creates a 3-gram index called <tt>cUserIdx</tt> on the name field of the <tt>customers</tt> dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on <a href="similarity.html#NGram_Index">similarity queries</a>.</p></div>
3480<div class="section">
3481<h4><a name="Example"></a>Example</h4>
3482
3483<div>
3484<div>
3485<pre class="source">CREATE INDEX cUserIdx ON customers(name) TYPE NGRAM(3);
3486</pre></div></div>
3487
3488<p>The following example creates a keyword index called <tt>oCityIdx</tt> on the <tt>city</tt> within the <tt>address</tt> field of the <tt>customers</tt> dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the <tt>address</tt> field. For details refer to the document on <a href="similarity.html#Keyword_Index">similarity queries</a>.</p></div>
3489<div class="section">
3490<h4><a name="Example"></a>Example</h4>
3491
3492<div>
3493<div>
3494<pre class="source">CREATE INDEX oCityIdx ON customers(address.city) TYPE KEYWORD;
3495</pre></div></div>
3496
3497<p>The following example creates a special secondary index which holds only the primary keys. This index is useful for speeding up aggregation queries which involve only primary keys. The name of the index is optional. If the name is not specified, the system will generate one. When the user would like to drop this index, the metadata can be queried to find the system-generated name.</p></div>
3498<div class="section">
3499<h4><a name="Example"></a>Example</h4>
3500
3501<div>
3502<div>
3503<pre class="source">CREATE PRIMARY INDEX cus_pk_idx ON customers;
3504</pre></div></div>
3505
3506<p>An example query that can be accelerated using the primary-key index:</p>
3507
3508<div>
3509<div>
3510<pre class="source">SELECT COUNT(*) FROM customers;
3511</pre></div></div>
3512
3513<p>To look up the the above primary-key index, issue the following query:</p>
3514
3515<div>
3516<div>
3517<pre class="source">SELECT VALUE i
3518FROM Metadata.`Index` i
3519WHERE i.DataverseName = &quot;Commerce&quot; AND i.DatasetName = &quot;customers&quot;;
3520</pre></div></div>
3521
3522<p>The query returns:</p>
3523
3524<div>
3525<div>
3526<pre class="source">[
3527 {
3528 &quot;DataverseName&quot;: &quot;Commerce&quot;,
3529 &quot;DatasetName&quot;: &quot;customers&quot;,
3530 &quot;IndexName&quot;: &quot;cus_pk_idx&quot;,
3531 &quot;IndexStructure&quot;: &quot;BTREE&quot;,
3532 &quot;SearchKey&quot;: [],
3533 &quot;IsPrimary&quot;: false,
3534 &quot;Timestamp&quot;: &quot;Fri Sep 18 14:15:51 PDT 2020&quot;,
3535 &quot;PendingOp&quot;: 0
3536 },
3537 {
3538 &quot;DataverseName&quot;: &quot;Commerce&quot;,
3539 &quot;DatasetName&quot;: &quot;customers&quot;,
3540 &quot;IndexName&quot;: &quot;customers&quot;,
3541 &quot;IndexStructure&quot;: &quot;BTREE&quot;,
3542 &quot;SearchKey&quot;: [
3543 [
3544 &quot;custid&quot;
3545 ]
3546 ],
3547 &quot;IsPrimary&quot;: true,
3548 &quot;Timestamp&quot;: &quot;Thu Jul 16 13:07:37 PDT 2020&quot;,
3549 &quot;PendingOp&quot;: 0
3550 }
3551]
3552</pre></div></div>
3553
3554<p>Remember that <tt>CREATE PRIMARY INDEX</tt> creates a secondary index. That is the reason the <tt>IsPrimary</tt> field is false. The primary-key index can be identified by the fact that the <tt>SearchKey</tt> field is empty since it only contains primary key fields.</p></div>
3555<div class="section">
3556<h4><a name="Create_Synonym"></a><a name="Synonyms" id="Synonyms"> Create Synonym</a></h4><hr /></div></div>
3557<div class="section">
3558<h3><a name="CreateSynonym"></a>CreateSynonym</h3>
3559<p><b><img src="../images/diagrams/CreateSynonym.png" alt="" /></b></p><hr />
3560<p>The <tt>CREATE SYNONYM</tt> statement creates a synonym for a given dataset. This synonym may be used used instead of the dataset name in <tt>SELECT</tt>, <tt>INSERT</tt>, <tt>UPSERT</tt>, <tt>DELETE</tt>, and <tt>LOAD</tt> statements. The target dataset does not need to exist when the synonym is created.</p>
3561<div class="section">
3562<div class="section">
3563<h5><a name="Example"></a>Example</h5>
3564
3565<div>
3566<div>
3567<pre class="source">CREATE DATASET customers(customersType) PRIMARY KEY custid;
3568
3569CREATE SYNONYM customersSynonym FOR customers;
3570
3571SELECT * FROM customersSynonym;
3572</pre></div></div>
3573
3574<p>More information on how synonyms are resolved can be found in the appendix section on Variable Resolution.</p></div></div>
3575<div class="section">
3576<h4><a name="Create_Function"></a><a name="Create_function" id="Create_function">Create Function</a></h4>
3577<p>The <tt>CREATE FUNCTION</tt> statement creates a <b>named</b> function that can then be used and reused in queries. The body of a function can be any query expression involving the function&#x2019;s parameters.</p><hr /></div></div>
3578<div class="section">
3579<h3><a name="CreateFunction"></a>CreateFunction</h3>
3580<p><b><img src="../images/diagrams/CreateFunction.png" alt="" /></b></p></div>
3581<div class="section">
3582<h3><a name="FunctionParameters"></a>FunctionParameters</h3>
3583<p><b><img src="../images/diagrams/FunctionParameters.png" alt="" /></b></p><hr />
3584<p>The following is an example of a <tt>CREATE FUNCTION</tt> statement which is similar to our earlier <tt>DECLARE FUNCTION</tt> example.</p>
3585<p>It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).</p>
3586<div class="section">
3587<div class="section">
3588<h5><a name="Example"></a>Example</h5>
3589
3590<div>
3591<div>
3592<pre class="source">CREATE FUNCTION nameSearch(customerId) {
3593 (SELECT c.custid, c.name
3594 FROM customers AS c
3595 WHERE u.custid = customerId)[0]
3596 };
3597</pre></div></div>
3598
3599<p>The following is an example of CREATE FUNCTION statement that replaces an existing function.</p></div>
3600<div class="section">
3601<h5><a name="Example"></a>Example</h5>
3602
3603<div>
3604<div>
3605<pre class="source">CREATE OR REPLACE FUNCTION friendInfo(userId) {
3606 (SELECT u.id, u.name
3607 FROM GleambookUsers u
3608 WHERE u.id = userId)[0]
3609 };
3610</pre></div></div>
3611
3612<p>External functions can also be loaded into Libraries via the <a href="../udf.html">UDF API</a>. Given an already loaded library <tt>pylib</tt>, a function <tt>sentiment</tt> mapping to a Python method <tt>sent_model.sentiment</tt> in <tt>sentiment_mod</tt> would be as follows</p></div>
3613<div class="section">
3614<h5><a name="Example"></a>Example</h5>
3615
3616<div>
3617<div>
3618<pre class="source">CREATE FUNCTION sentiment(a)
3619 AS &quot;sentiment_mod&quot;, &quot;sent_model.sentiment&quot; AT pylib;
3620</pre></div></div>
3621</div></div></div>
3622<div class="section">
3623<h3><a name="Drop_Statement"></a><a name="Removal" id="Removal">Drop Statement</a></h3><hr /></div>
3624<div class="section">
3625<h3><a name="DropStmnt"></a>DropStmnt</h3>
3626<p><b><img src="../images/diagrams/DropStmnt.png" alt="" /></b></p></div>
3627<div class="section">
3628<h3><a name="FunctionSignature"></a>FunctionSignature</h3>
3629<p><b><img src="../images/diagrams/FunctionSignature.png" alt="" /></b></p><hr />
3630<p>The <tt>DROP</tt> statement is the inverse of the <tt>CREATE</tt> statement. It can be used to drop dataverses, datatypes, datasets, indexes, functions, and synonyms.</p>
3631<p>The following examples illustrate some uses of the <tt>DROP</tt> statement.</p>
3632<div class="section">
3633<div class="section">
3634<h5><a name="Example"></a>Example</h5>
3635
3636<div>
3637<div>
3638<pre class="source">DROP DATASET customers IF EXISTS;
3639
3640DROP INDEX orders.orderidIndex;
3641
3642DROP TYPE customers2.customersType;
3643
3644DROP FUNCTION nameSearch@1;
3645
3646DROP SYNONYM customersSynonym;
3647
3648DROP DATAVERSE CommerceData;
3649</pre></div></div>
3650
3651<p>When an artifact is dropped, it will be droppped from the current dataverse if none is specified (see the <tt>DROP DATASET</tt> example above) or from the specified dataverse (see the <tt>DROP TYPE</tt> example above) if one is specified by fully qualifying the artifact name in the <tt>DROP</tt> statement. When specifying an index to drop, the index name must be qualified by the dataset that it indexes. When specifying a function to drop, since the query language allows functions to be overloaded by their number of arguments, the identifying name of the function to be dropped must explicitly include that information. (<tt>nameSearch@1</tt> above denotes the 1-argument function named nameSearch in the current dataverse.)</p></div></div></div>
3652<div class="section">
3653<h3><a name="Load_Statement"></a><a name="Load_statement" id="Load_statement">Load Statement</a></h3><hr /></div>
3654<div class="section">
3655<h3><a name="LoadStmnt"></a>LoadStmnt</h3>
3656<p><b><img src="../images/diagrams/LoadStmnt.png" alt="" /></b></p></div>
3657<div class="section">
3658<h3><a name="Configuration"></a>Configuration</h3>
3659<p><b><img src="../images/diagrams/Configuration.png" alt="" /></b></p></div>
3660<div class="section">
3661<h3><a name="KeyValuePair"></a>KeyValuePair</h3>
3662<p><b><img src="../images/diagrams/KeyValuePair.png" alt="" /></b></p><hr />
3663<p>The <tt>LOAD</tt> statement is used to initially populate a dataset via bulk loading of data from an external file. An appropriate adapter must be selected to handle the nature of the desired external data. The <tt>LOAD</tt> statement accepts the same adapters and the same parameters as discussed earlier for External datasets. (See the <a href="../aql/externaldata.html">guide to external data</a> for more information on the available adapters.) If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.</p>
3664<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
3665<p>The following example shows how to bulk load the <tt>customers</tt> dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.</p>
3666<div class="section">
3667<div class="section">
3668<h5><a name="Example"></a>Example</h5>
3669
3670<div>
3671<div>
3672<pre class="source"> LOAD DATASET customers USING localfs
3673 ((&quot;path&quot;=&quot;127.0.0.1:///Users/bignosqlfan/commercenew/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
3674</pre></div></div>
3675</div></div></div></div>
3676<div class="section">
3677<h2><a name="Modification_statements" id="Modification_statements">Modification statements</a></h2>
3678<div class="section">
3679<h3><a name="Insert_Statement"></a><a name="Inserts" id="Inserts">Insert Statement</a></h3><hr /></div>
3680<div class="section">
3681<h3><a name="InsertStmnt"></a>InsertStmnt</h3>
3682<p><b><img src="../images/diagrams/InsertStmnt.png" alt="" /></b></p><hr />
3683<p>The <tt>INSERT</tt> statement is used to insert new data into a dataset. The data to be inserted comes from a query expression. This expression can be as simple as a constant expression, or in general it can be any legal query. In case the dataset has an auto-generated primary key, when performing an <tt>INSERT</tt> operation, the system allows the user to manually add the auto-generated key field in the <tt>INSERT</tt> statement, or skip that field and the system will automatically generate it and add it. However, it is important to note that if the a record already exists in the dataset with the auto-generated key provided by the user, then that operation is going to fail. As a general rule, insertion will fail if the dataset already has data with the primary key value(s) being inserted.</p>
3684<p>Inserts are processed transactionally by the system. The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). If the query part of an insert returns a single object, then the <tt>INSERT</tt> statement will be a single, atomic transaction. If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction.</p>
3685<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
3686<p>The following example illustrates a query-based insertion.</p>
3687<div class="section">
3688<div class="section">
3689<h5><a name="Example"></a>Example</h5>
3690
3691<div>
3692<div>
3693<pre class="source">INSERT INTO custCopy (SELECT VALUE c FROM customers c)
3694</pre></div></div>
3695</div></div></div>
3696<div class="section">
3697<h3><a name="Upsert_Statement"></a><a name="Upserts" id="Upserts">Upsert Statement</a></h3><hr /></div>
3698<div class="section">
3699<h3><a name="UpsertStmnt"></a>UpsertStmnt</h3>
3700<p><b><img src="../images/diagrams/UpsertStmnt.png" alt="" /></b></p><hr />
3701<p>The <tt>UPSERT</tt> statement syntactically mirrors the <tt>INSERT</tt>statement discussed above. The difference lies in its semantics, which for <tt>UPSERT</tt> are &#x201c;add or replace&#x201d; instead of the <tt>INSERT</tt> &#x201c;add if not present, else error&#x201d; semantics. Whereas an <tt>INSERT</tt> can fail if another object already exists with the specified key, the analogous <tt>UPSERT</tt> will replace the previous object&#x2019;s value with that of the new object in such cases. Like the <tt>INSERT</tt> statement, the system allows the user to manually provide the auto-generated key for datasets with an auto-generated key as its primary key. This operation will insert the record if no record with that key already exists, but if a record with the key already exists, then the operation will be converted to a replace/update operation.</p>
3702<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
3703<p>The following example illustrates a query-based upsert operation.</p>
3704<div class="section">
3705<div class="section">
3706<h5><a name="Example"></a>Example</h5>
3707
3708<div>
3709<div>
3710<pre class="source">UPSERT INTO custCopy (SELECT VALUE c FROM customers c)
3711</pre></div></div>
3712</div></div></div>
3713<div class="section">
3714<h3><a name="Delete_Statement"></a><a name="Deletes" id="Deletes">Delete Statement</a></h3><hr /></div>
3715<div class="section">
3716<h3><a name="DeleteStmnt"></a>DeleteStmnt</h3>
3717<p><b><img src="../images/diagrams/DeleteStmnt.png" alt="" /></b></p><hr />
3718<p>The <tt>DELETE</tt> statement is used to delete data from a target dataset. The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the <tt>DELETE</tt> statement.</p>
3719<p>Deletes are processed transactionally by the system. The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). If the boolean expression for a delete identifies a single object, then the <tt>DELETE</tt> statement itself will be a single, atomic transaction. If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.</p>
3720<p>The target dataset name may be a synonym introduced by <tt>CREATE SYNONYM</tt> statement.</p>
3721<p>The following examples illustrate single-object deletions.</p>
3722<div class="section">
3723<div class="section">
3724<h5><a name="Example"></a>Example</h5>
3725
3726<div>
3727<div>
3728<pre class="source">DELETE FROM customers c WHERE c.custid = &quot;C41&quot;;
3729</pre></div></div>
3730</div>
3731<div class="section">
3732<h5><a name="Example"></a>Example</h5>
3733
3734<div>
3735<div>
3736<pre class="source">DELETE FROM customers WHERE custid = &quot;C47&quot;;
3737</pre></div></div>
3738<!--
3739 ! Licensed to the Apache Software Foundation (ASF) under one
3740 ! or more contributor license agreements. See the NOTICE file
3741 ! distributed with this work for additional information
3742 ! regarding copyright ownership. The ASF licenses this file
3743 ! to you under the Apache License, Version 2.0 (the
3744 ! "License"); you may not use this file except in compliance
3745 ! with the License. You may obtain a copy of the License at
3746 !
3747 ! http://www.apache.org/licenses/LICENSE-2.0
3748 !
3749 ! Unless required by applicable law or agreed to in writing,
3750 ! software distributed under the License is distributed on an
3751 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3752 ! KIND, either express or implied. See the License for the
3753 ! specific language governing permissions and limitations
3754 ! under the License.
3755 !-->
3756
3757<h1><a name="Reserved_keywords" id="Reserved_keywords">Appendix 1. Reserved keywords</a></h1><!--
3758 ! Licensed to the Apache Software Foundation (ASF) under one
3759 ! or more contributor license agreements. See the NOTICE file
3760 ! distributed with this work for additional information
3761 ! regarding copyright ownership. The ASF licenses this file
3762 ! to you under the Apache License, Version 2.0 (the
3763 ! "License"); you may not use this file except in compliance
3764 ! with the License. You may obtain a copy of the License at
3765 !
3766 ! http://www.apache.org/licenses/LICENSE-2.0
3767 !
3768 ! Unless required by applicable law or agreed to in writing,
3769 ! software distributed under the License is distributed on an
3770 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3771 ! KIND, either express or implied. See the License for the
3772 ! specific language governing permissions and limitations
3773 ! under the License.
3774 !-->
3775
3776<p>All reserved keywords are listed in the following table:</p>
3777<table border="0" class="table table-striped">
3778<thead>
3779
3780<tr class="a">
3781<th> </th>
3782<th> </th>
3783<th> </th>
3784<th> </th>
3785<th> </th>
3786<th> </th></tr>
3787</thead><tbody>
3788
3789<tr class="b">
3790<td> AND </td>
3791<td> ANY </td>
3792<td> APPLY </td>
3793<td> AS </td>
3794<td> ASC </td>
3795<td> AT </td></tr>
3796<tr class="a">
3797<td> AUTOGENERATED </td>
3798<td> BETWEEN </td>
3799<td> BTREE </td>
3800<td> BY </td>
3801<td> CASE </td>
3802<td> CLOSED </td></tr>
3803<tr class="b">
3804<td> CREATE </td>
3805<td> COMPACTION </td>
3806<td> COMPACT </td>
3807<td> CONNECT </td>
3808<td> CORRELATE </td>
3809<td> DATASET </td></tr>
3810<tr class="a">
3811<td> COLLECTION </td>
3812<td> DATAVERSE </td>
3813<td> DECLARE </td>
3814<td> DEFINITION </td>
3815<td> DECLARE </td>
3816<td> DEFINITION </td></tr>
3817<tr class="b">
3818<td> DELETE </td>
3819<td> DESC </td>
3820<td> DISCONNECT </td>
3821<td> DISTINCT </td>
3822<td> DROP </td>
3823<td> ELEMENT </td></tr>
3824<tr class="a">
3825<td> ELEMENT </td>
3826<td> EXPLAIN </td>
3827<td> ELSE </td>
3828<td> ENFORCED </td>
3829<td> END </td>
3830<td> EVERY </td></tr>
3831<tr class="b">
3832<td> EXCEPT </td>
3833<td> EXIST </td>
3834<td> EXTERNAL </td>
3835<td> FEED </td>
3836<td> FILTER </td>
3837<td> FLATTEN </td></tr>
3838<tr class="a">
3839<td> FOR </td>
3840<td> FROM </td>
3841<td> FULL </td>
3842<td> FUNCTION </td>
3843<td> GROUP </td>
3844<td> HAVING </td></tr>
3845<tr class="b">
3846<td> HINTS </td>
3847<td> IF </td>
3848<td> INTO </td>
3849<td> IN </td>
3850<td> INDEX </td>
3851<td> INGESTION </td></tr>
3852<tr class="a">
3853<td> INNER </td>
3854<td> INSERT </td>
3855<td> INTERNAL </td>
3856<td> INTERSECT </td>
3857<td> IS </td>
3858<td> JOIN </td></tr>
3859<tr class="b">
3860<td> KEYWORD </td>
3861<td> LEFT </td>
3862<td> LETTING </td>
3863<td> LET </td>
3864<td> LIKE </td>
3865<td> LIMIT </td></tr>
3866<tr class="a">
3867<td> LOAD </td>
3868<td> NODEGROUP </td>
3869<td> NGRAM </td>
3870<td> NOT </td>
3871<td> OFFSET </td>
3872<td> ON </td></tr>
3873<tr class="b">
3874<td> OPEN </td>
3875<td> OR </td>
3876<td> ORDER </td>
3877<td> OUTER </td>
3878<td> OUTPUT </td>
3879<td> OVER </td></tr>
3880<tr class="a">
3881<td> PATH </td>
3882<td> POLICY </td>
3883<td> PRE-SORTED </td>
3884<td> PRIMARY </td>
3885<td> RAW </td>
3886<td> REFRESH </td></tr>
3887<tr class="b">
3888<td> RETURN </td>
3889<td> RTREE </td>
3890<td> RUN </td>
3891<td> SATISFIES </td>
3892<td> SECONDARY </td>
3893<td> SELECT </td></tr>
3894<tr class="a">
3895<td> SET </td>
3896<td> SOME </td>
3897<td> TEMPORARY </td>
3898<td> THEN </td>
3899<td> TYPE </td>
3900<td> UNKNOWN </td></tr>
3901<tr class="b">
3902<td> UNNEST </td>
3903<td> UPDATE </td>
3904<td> USE </td>
3905<td> USING </td>
3906<td> VALUE </td>
3907<td> WHEN </td></tr>
3908<tr class="a">
3909<td> WHERE </td>
3910<td> WITH </td>
3911<td> WRITE </td>
3912<td> </td>
3913<td> </td>
3914<td> </td></tr>
3915</tbody>
3916</table><!--
3917 ! Licensed to the Apache Software Foundation (ASF) under one
3918 ! or more contributor license agreements. See the NOTICE file
3919 ! distributed with this work for additional information
3920 ! regarding copyright ownership. The ASF licenses this file
3921 ! to you under the Apache License, Version 2.0 (the
3922 ! "License"); you may not use this file except in compliance
3923 ! with the License. You may obtain a copy of the License at
3924 !
3925 ! http://www.apache.org/licenses/LICENSE-2.0
3926 !
3927 ! Unless required by applicable law or agreed to in writing,
3928 ! software distributed under the License is distributed on an
3929 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3930 ! KIND, either express or implied. See the License for the
3931 ! specific language governing permissions and limitations
3932 ! under the License.
3933 !-->
3934</div></div></div></div>
3935<div class="section">
3936<h2><a name="Appendix_2._Performance_Tuning"></a><a name="Performance_tuning" id="Performance_tuning">Appendix 2. Performance Tuning</a></h2><!--
3937 ! Licensed to the Apache Software Foundation (ASF) under one
3938 ! or more contributor license agreements. See the NOTICE file
3939 ! distributed with this work for additional information
3940 ! regarding copyright ownership. The ASF licenses this file
3941 ! to you under the Apache License, Version 2.0 (the
3942 ! "License"); you may not use this file except in compliance
3943 ! with the License. You may obtain a copy of the License at
3944 !
3945 ! http://www.apache.org/licenses/LICENSE-2.0
3946 !
3947 ! Unless required by applicable law or agreed to in writing,
3948 ! software distributed under the License is distributed on an
3949 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3950 ! KIND, either express or implied. See the License for the
3951 ! specific language governing permissions and limitations
3952 ! under the License.
3953 !-->
3954
3955<p>The <tt>SET</tt> statement can be used to override some cluster-wide configuration parameters for a specific request:</p>
3956<div class="section">
3957<h3><a name="SetStmnt"></a>SetStmnt</h3>
3958<p><b><img src="../images/diagrams/SetStmnt.png" alt="" /></b></p>
3959<p>As parameter identifiers are qualified names (containing a &#x2018;.&#x2019;) they have to be escaped using backticks (``). Note that changing query parameters will not affect query correctness but only impact performance characteristics, such as response time and throughput.</p></div></div>
3960<div class="section">
3961<h2><a name="Parallelism_Parameter"></a><a name="Parallelism_parameter" id="Parallelism_parameter">Parallelism Parameter</a></h2>
3962<p>The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) in a cluster. A user can manually specify the maximum execution parallelism for a request to scale it up and down using the following parameter:</p>
3963<ul>
3964
3965<li><b>compiler.parallelism</b>: the maximum number of CPU cores can be used to process a query. There are three cases of the value <i>p</i> for compiler.parallelism:
3966<ul>
3967
3968<li>
3969
3970<p><i>p</i> &lt; 0 or <i>p</i> &gt; the total number of cores in a cluster: the system will use all available cores in the cluster;</p>
3971</li>
3972<li>
3973
3974<p><i>p</i> = 0 (the default): the system will use the storage parallelism (the number of partitions of stored datasets) as the maximum parallelism for query processing;</p>
3975</li>
3976<li>
3977
3978<p>all other cases: the system will use the user-specified number as the maximum number of CPU cores to use for executing the query.</p>
3979</li>
3980</ul>
3981</li>
3982</ul>
3983<div class="section">
3984<div class="section">
3985<div class="section">
3986<h5><a name="Example"></a>Example</h5>
3987
3988<div>
3989<div>
3990<pre class="source">SET `compiler.parallelism` &quot;16&quot;;
3991
3992SELECT c.name AS cname, o.orderno AS orderno
3993FROM customers c JOIN orders o ON c.custid = o.custid;
3994</pre></div></div>
3995</div></div></div></div>
3996<div class="section">
3997<h2><a name="Memory_Parameters"></a><a name="Memory_parameters" id="Memory_parameters">Memory Parameters</a></h2>
3998<p>In the system, each blocking runtime operator such as join, group-by and order-by works within a fixed memory budget, and can gracefully spill to disks if the memory budget is smaller than the amount of data they have to hold. A user can manually configure the memory budget of those operators within a query. The supported configurable memory parameters are:</p>
3999<ul>
4000
4001<li>
4002
4003<p><b>compiler.groupmemory</b>: the memory budget that each parallel group-by operator instance can use; 32MB is the default budget.</p>
4004</li>
4005<li>
4006
4007<p><b>compiler.sortmemory</b>: the memory budget that each parallel sort operator instance can use; 32MB is the default budget.</p>
4008</li>
4009<li>
4010
4011<p><b>compiler.joinmemory</b>: the memory budget that each parallel hash join operator instance can use; 32MB is the default budget.</p>
4012</li>
4013<li>
4014
4015<p><b>compiler.windowmemory</b>: the memory budget that each parallel window aggregate operator instance can use; 32MB is the default budget.</p>
4016</li>
4017</ul>
4018<p>For each memory budget value, you can use a 64-bit integer value with a 1024-based binary unit suffix (for example, B, KB, MB, GB). If there is no user-provided suffix, &#x201c;B&#x201d; is the default suffix. See the following examples.</p>
4019<div class="section">
4020<div class="section">
4021<div class="section">
4022<h5><a name="Example"></a>Example</h5>
4023
4024<div>
4025<div>
4026<pre class="source">SET `compiler.groupmemory` &quot;64MB&quot;;
4027
4028SELECT c.custid, COUNT(*)
4029FROM customers c
4030GROUP BY c.custid;
4031</pre></div></div>
4032</div>
4033<div class="section">
4034<h5><a name="Example"></a>Example</h5>
4035
4036<div>
4037<div>
4038<pre class="source">SET `compiler.sortmemory` &quot;67108864&quot;;
4039
4040SELECT VALUE o
4041FROM orders AS o
4042ORDER BY ARRAY_LENGTH(o.items) DESC;
4043</pre></div></div>
4044</div>
4045<div class="section">
4046<h5><a name="Example"></a>Example</h5>
4047
4048<div>
4049<div>
4050<pre class="source">SET `compiler.joinmemory` &quot;132000KB&quot;;
4051
4052SELECT c.name AS cname, o.ordeno AS orderno
4053FROM customers c JOIN orders o ON c.custid = o.custid;
4054</pre></div></div>
4055<!--
4056 ! Licensed to the Apache Software Foundation (ASF) under one
4057 ! or more contributor license agreements. See the NOTICE file
4058 ! distributed with this work for additional information
4059 ! regarding copyright ownership. The ASF licenses this file
4060 ! to you under the Apache License, Version 2.0 (the
4061 ! "License"); you may not use this file except in compliance
4062 ! with the License. You may obtain a copy of the License at
4063 !
4064 ! http://www.apache.org/licenses/LICENSE-2.0
4065 !
4066 ! Unless required by applicable law or agreed to in writing,
4067 ! software distributed under the License is distributed on an
4068 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4069 ! KIND, either express or implied. See the License for the
4070 ! specific language governing permissions and limitations
4071 ! under the License.
4072 !-->
4073</div></div></div></div>
4074<div class="section">
4075<h2><a name="Parallel_Sort_Parameter"></a><a name="Parallel_sort_parameter" id="Parallel_sort_parameter">Parallel Sort Parameter</a></h2>
4076<p>The following parameter enables you to activate or deactivate full parallel sort for order-by operations.</p>
4077<p>When full parallel sort is inactive (<tt>false</tt>), each existing data partition is sorted (in parallel), and then all data partitions are merged into a single node.</p>
4078<p>When full parallel sort is active (<tt>true</tt>), the data is first sampled, and then repartitioned so that each partition contains data that is greater than the previous partition. The data in each partition is then sorted (in parallel), but the sorted partitions are not merged into a single node.</p>
4079<ul>
4080
4081<li><b>compiler.sort.parallel</b>: A boolean specifying whether full parallel sort is active (<tt>true</tt>) or inactive (<tt>false</tt>). The default value is <tt>true</tt>.</li>
4082</ul>
4083<div class="section">
4084<div class="section">
4085<div class="section">
4086<h5><a name="Example"></a>Example</h5>
4087
4088<div>
4089<div>
4090<pre class="source">SET `compiler.sort.parallel` &quot;true&quot;;
4091
4092SELECT VALUE o
4093FROM orders AS o
4094ORDER BY ARRAY_LENGTH(o.items) DESC;
4095</pre></div></div>
4096<!--
4097 ! Licensed to the Apache Software Foundation (ASF) under one
4098 ! or more contributor license agreements. See the NOTICE file
4099 ! distributed with this work for additional information
4100 ! regarding copyright ownership. The ASF licenses this file
4101 ! to you under the Apache License, Version 2.0 (the
4102 ! "License"); you may not use this file except in compliance
4103 ! with the License. You may obtain a copy of the License at
4104 !
4105 ! http://www.apache.org/licenses/LICENSE-2.0
4106 !
4107 ! Unless required by applicable law or agreed to in writing,
4108 ! software distributed under the License is distributed on an
4109 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4110 ! KIND, either express or implied. See the License for the
4111 ! specific language governing permissions and limitations
4112 ! under the License.
4113 !-->
4114</div></div></div></div>
4115<div class="section">
4116<h2><a name="Controlling_Index-Only-Plan_Parameter"></a><a name="Index_Only" id="Index_Only">Controlling Index-Only-Plan Parameter</a></h2>
4117<p>By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible. For example, if a <tt>SELECT</tt> or <tt>JOIN</tt> query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer checks whether a secondary-index search alone can generate the result that the query asks for. It mainly checks two conditions: (1) predicates used in <tt>WHERE</tt> only uses the primary key field and/or secondary key field and (2) the result does not return any other fields. If these two conditions hold, it builds an index-only plan. Since an index-only plan only searches a secondary-index to answer a query, it is faster than a non-index-only plan that needs to search the primary index. However, this index-only plan can be turned off per query by setting the following parameter.</p>
4118<ul>
4119
4120<li><b>compiler.indexonly</b>: if this is set to false, the index-only-plan will not be applied; the default value is true.</li>
4121</ul>
4122<div class="section">
4123<div class="section">
4124<div class="section">
4125<h5><a name="Example"></a>Example</h5>
4126
4127<div>
4128<div>
4129<pre class="source">set `compiler.indexonly` &quot;false&quot;;
4130
4131SELECT o.order_date AS orderdate
4132FROM orders o where o.order_date = &quot;2020-05-01&quot;;
4133</pre></div></div>
4134<!--
4135 ! Licensed to the Apache Software Foundation (ASF) under one
4136 ! or more contributor license agreements. See the NOTICE file
4137 ! distributed with this work for additional information
4138 ! regarding copyright ownership. The ASF licenses this file
4139 ! to you under the Apache License, Version 2.0 (the
4140 ! "License"); you may not use this file except in compliance
4141 ! with the License. You may obtain a copy of the License at
4142 !
4143 ! http://www.apache.org/licenses/LICENSE-2.0
4144 !
4145 ! Unless required by applicable law or agreed to in writing,
4146 ! software distributed under the License is distributed on an
4147 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4148 ! KIND, either express or implied. See the License for the
4149 ! specific language governing permissions and limitations
4150 ! under the License.
4151 !-->
4152</div></div></div></div>
4153<div class="section">
4154<h2><a name="Query_Hints"></a><a name="Query_hints" id="Query_hints">Query Hints</a></h2>
4155<div class="section">
4156<div class="section">
4157<h4><a name="a.E2.80.9Chash.E2.80.9D_GROUP_BY_hint"></a><a name="hash_groupby" id="hash_groupby">&#x201c;hash&#x201d; GROUP BY hint</a></h4>
4158<p>The system supports two algorithms for GROUP BY clause evaluation: pre-sorted and hash-based. By default it uses the pre-sorted approach: The input data is first sorted on the grouping fields and then aggregation is performed on that sorted data. The alternative is a hash-based strategy which can be enabled via a <tt>/*+ hash */</tt> GROUP BY hint: The data is aggregated using an in-memory hash-table (that can spill to disk if necessary). This approach is recommended for low-cardinality grouping fields.</p>
4159<div class="section">
4160<h5><a name="Example:"></a>Example:</h5>
4161
4162<div>
4163<div>
4164<pre class="source">SELECT c.address.state, count(*)
4165FROM Customers AS c
4166/*+ hash */ GROUP BY c.address.state
4167</pre></div></div>
4168</div></div>
4169<div class="section">
4170<h4><a name="a.E2.80.9Chash-bcast.E2.80.9D_JOIN_hint"></a><a name="hash_bcast_join" id="hash_bcast_join">&#x201c;hash-bcast&#x201d; JOIN hint</a></h4>
4171<p>By default the system uses a partitioned-parallel hash join strategy to parallelize the execution of an equi-join. In this approach both sides of the join are repartitioned (if necessary) on a hash of the join key; potentially matching data items thus arrive at the same partition to be joined locally. This strategy is robust, but not always the fastest when one of the join sides is low cardinality and the other is high cardinality (since it scans and potentially moves the data from both sides). This special case can be better handled by broadcasting (replicating) the smaller side to all data partitions of the larger side and not moving the data from the other (larger) side. The system provides a join hint to enable this strategy: <tt>/*+ hash-bcast */</tt>. This hint forces the right side of the join to be replicated while the left side retains its original partitioning.</p>
4172<div class="section">
4173<h5><a name="Example:"></a>Example:</h5>
4174
4175<div>
4176<div>
4177<pre class="source">SELECT *
4178FROM Orders AS o JOIN Customers AS c
4179ON o.customer_id /*+ hash-bcast */ = c.customer_id
4180</pre></div></div>
4181<!--
4182 ! Licensed to the Apache Software Foundation (ASF) under one
4183 ! or more contributor license agreements. See the NOTICE file
4184 ! distributed with this work for additional information
4185 ! regarding copyright ownership. The ASF licenses this file
4186 ! to you under the Apache License, Version 2.0 (the
4187 ! "License"); you may not use this file except in compliance
4188 ! with the License. You may obtain a copy of the License at
4189 !
4190 ! http://www.apache.org/licenses/LICENSE-2.0
4191 !
4192 ! Unless required by applicable law or agreed to in writing,
4193 ! software distributed under the License is distributed on an
4194 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4195 ! KIND, either express or implied. See the License for the
4196 ! specific language governing permissions and limitations
4197 ! under the License.
4198 !-->
4199</div></div></div></div>
4200<div class="section">
4201<h2><a name="Appendix_3._Variable_Bindings_and_Name_Resolution"></a><a name="Variable_bindings_and_name_resolution" id="Variable_bindings_and_name_resolution">Appendix 3. Variable Bindings and Name Resolution</a></h2><!--
4202 ! Licensed to the Apache Software Foundation (ASF) under one
4203 ! or more contributor license agreements. See the NOTICE file
4204 ! distributed with this work for additional information
4205 ! regarding copyright ownership. The ASF licenses this file
4206 ! to you under the Apache License, Version 2.0 (the
4207 ! "License"); you may not use this file except in compliance
4208 ! with the License. You may obtain a copy of the License at
4209 !
4210 ! http://www.apache.org/licenses/LICENSE-2.0
4211 !
4212 ! Unless required by applicable law or agreed to in writing,
4213 ! software distributed under the License is distributed on an
4214 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4215 ! KIND, either express or implied. See the License for the
4216 ! specific language governing permissions and limitations
4217 ! under the License.
4218 !-->
4219
4220<p>In this Appendix, we&#x2019;ll look at how variables are bound and how names are resolved. Names can appear in every clause of a query. Sometimes a name consists of just a single identifier, e.g., <tt>region</tt> or <tt>revenue</tt>. More often a name will consist of two identifiers separated by a dot, e.g., <tt>customer.address</tt>. Occasionally a name may have more than two identifiers, e.g., <tt>policy.owner.address.zipcode</tt>. <i>Resolving</i> a name means determining exactly what the (possibly multi-part) name refers to. It is necessary to have well-defined rules for how to resolve a name in cases of ambiguity. (In the absence of schemas, such cases arise more commonly, and also differently, than they do in SQL.)</p>
4221<p>The basic job of each clause in a query block is to bind variables. Each clause sees the variables bound by previous clauses and may bind additional variables. Names are always resolved with respect to the variables that are bound (&#x201c;in scope&#x201d;) at the place where the name use in question occurs. It is possible that the name resolution process will fail, which may lead to an empty result or an error message.</p>
4222<p>One important bit of background: When the system is reading a query and resolving its names, it has a list of all the available dataverses and datasets. As a result, it knows whether <tt>a.b</tt> is a valid name for dataset <tt>b</tt> in dataverse <tt>a</tt>. However, the system does not in general have knowledge of the schemas of the data inside the datasets; remember that this is a much more open world. As a result, in general the system cannot know whether any object in a particular dataset will have a field named <tt>c</tt>. These assumptions affect how errors are handled. If you try to access dataset <tt>a.b</tt> and no dataset by that name exists, you will get an error and your query will not run. However, if you try to access a field <tt>c</tt> in a collection of objects, your query will run and return <tt>missing</tt> for each object that doesn&#x2019;t have a field named <tt>c</tt> - this is because it&#x2019;s possible that some object (someday) could have such a field.</p></div>
4223<div class="section">
4224<h2><a name="Binding_Variables"></a><a name="Binding_variables" id="Binding_variables">Binding Variables</a></h2>
4225<p>Variables can be bound in the following ways:</p>
4226<ol style="list-style-type: decimal">
4227
4228<li>
4229
4230<p><tt>WITH</tt> and <tt>LET</tt> clauses bind a variable to the result of an expression in a straightforward way</p>
4231<p>Examples:</p>
4232<p><tt>WITH cheap_parts AS (SELECT partno FROM parts WHERE price &lt; 100)</tt> binds the variable <tt>cheap_parts</tt> to the result of the subquery.</p>
4233<p><tt>LET pay = salary + bonus</tt> binds the variable <tt>pay</tt> to the result of evaluating the expression <tt>salary + bonus</tt>.</p>
4234</li>
4235<li>
4236
4237<p><tt>FROM</tt>, <tt>GROUP BY</tt>, and <tt>SELECT</tt> clauses have optional <tt>AS</tt> subclauses that contain an expression and a name (called an <i>iteration variable</i> in a <tt>FROM</tt> clause, or an alias in <tt>GROUP BY</tt> or <tt>SELECT</tt>).</p>
4238<p>Examples:</p>
4239<p><tt>FROM customer AS c, order AS o</tt></p>
4240<p><tt>GROUP BY salary + bonus AS total_pay</tt></p>
4241<p><tt>SELECT MAX(price) AS highest_price</tt></p>
4242<p>An <tt>AS</tt> subclause always binds the name (as a variable) to the result of the expression (or, in the case of a <tt>FROM</tt> clause, to the <i>individual members</i> of the collection identified by the expression).</p>
4243<p>It&#x2019;s always a good practice to use the keyword <tt>AS</tt> when defining an alias or iteration variable. However, as in SQL, the syntax allows the keyword <tt>AS</tt> to be omitted. For example, the <tt>FROM</tt> clause above could have been written like this:</p>
4244<p><tt>FROM customer c, order o</tt></p>
4245<p>Omitting the keyword <tt>AS</tt> does not affect the binding of variables. The FROM clause in this example binds variables c and o whether the keyword AS is used or not.</p>
4246<p>In certain cases, a variable is automatically bound even if no alias or variable-name is specified. Whenever an expression could have been followed by an AS subclause, if the expression consists of a simple name or a path expression, that expression binds a variable whose name is the same as the simple name or the last step in the path expression. Here are some examples:</p>
4247<p><tt>FROM customer, order</tt> binds iteration variables named <tt>customer</tt> and <tt>order</tt></p>
4248<p><tt>GROUP BY address.zipcode</tt> binds a variable named <tt>zipcode</tt></p>
4249<p><tt>SELECT item[0].price</tt> binds a variable named <tt>price</tt></p>
4250<p>Note that a <tt>FROM</tt> clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn. The name of the collection remains in scope, but it is not a variable. For example, consider this <tt>FROM</tt> clause used in a self-join:</p>
4251<p><tt>FROM customer AS c1, customer AS c2</tt></p>
4252<p>This <tt>FROM</tt> clause joins the customer dataset to itself, binding the iteration variables <tt>c1</tt> and <tt>c2</tt> to objects in the left-hand-side and right-hand-side of the join, respectively. After the <tt>FROM</tt> clause, <tt>c1</tt> and <tt>c2</tt> are in scope as variables, and customer remains accessible as a dataset name but not as a variable.</p>
4253</li>
4254<li>
4255
4256<p>Special rules for <tt>GROUP BY</tt>:</p>
4257<ul>
4258
4259<li>
4260
4261<p>(3A): If a <tt>GROUP BY</tt> clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself. For example:</p>
4262<p><tt>GROUP BY salary + bonus</tt> binds a pseudo-variable named <tt>salary + bonus</tt>.</p>
4263<p>This rule allows subsequent clauses to refer to the grouping expression (salary + bonus) even though its constituent variables (salary and bonus) are no longer in scope. For example, the following query is valid:</p>
4264
4265<div>
4266<div>
4267<pre class="source">FROM employee
4268GROUP BY salary + bonus
4269HAVING salary + bonus &gt; 1000
4270SELECT salary + bonus, COUNT(*) AS how_many
4271</pre></div></div>
4272
4273<p>While it might have been more elegant to explicitly require an alias in cases like this, the pseudo-variable rule is retained for SQL compatibility. Note that the expression <tt>salary + bonus</tt> is not <i>actually</i> evaluated in the <tt>HAVING</tt> and <tt>SELECT</tt> clauses (and could not be since <tt>salary</tt> and <tt>bonus</tt> are no longer individually in scope). Instead, the expression <tt>salary + bonus</tt> is treated as a reference to the pseudo-variable defined in the <tt>GROUP BY</tt> clause.</p>
4274</li>
4275<li>
4276
4277<p>(3B): The <tt>GROUP BY</tt> clause may be followed by a <tt>GROUP AS</tt> clause that binds a variable to the group. The purpose of this variable is to make the individual objects inside the group visible to subqueries that may need to iterate over them.</p>
4278<p>The <tt>GROUP AS</tt> variable is bound to a multiset of objects. Each object represents one of the members of the group. Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest <tt>FROM</tt> clause (and its <tt>LET</tt> subclause, if any). These nested objects, in turn, contain the actual fields of the group-member. To understand this process, consider the following query fragment:</p>
4279
4280<div>
4281<div>
4282<pre class="source">FROM parts AS p, suppliers AS s
4283WHERE p.suppno = s.suppno
4284GROUP BY p.color GROUP AS g
4285</pre></div></div>
4286
4287<p>Suppose that the objects in <tt>parts</tt> have fields <tt>partno</tt>, <tt>color</tt>, and <tt>suppno</tt>. Suppose that the objects in suppliers have fields <tt>suppno</tt> and <tt>location</tt>.</p>
4288<p>Then, for each group formed by the <tt>GROUP BY</tt>, the variable g will be bound to a multiset with the following structure:</p>
4289
4290<div>
4291<div>
4292<pre class="source">[ { &quot;p&quot;: { &quot;partno&quot;: &quot;p1&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s1&quot; },
4293 &quot;s&quot;: { &quot;suppno&quot;: &quot;s1&quot;, &quot;location&quot;: &quot;Denver&quot; } },
4294 { &quot;p&quot;: { &quot;partno&quot;: &quot;p2&quot;, &quot;color&quot;: &quot;red&quot;, &quot;suppno&quot;: &quot;s2&quot; },
4295 &quot;s&quot;: { &quot;suppno&quot;: &quot;s2&quot;, &quot;location&quot;: &quot;Atlanta&quot; } },
4296 ...
4297]
4298</pre></div></div>
4299</li>
4300</ul>
4301</li>
4302</ol></div>
4303<div class="section">
4304<h2><a name="Scoping" id="Scoping">Scoping</a></h2>
4305<p>In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a <tt>WITH</tt> clause at the beginning of the query. More specific rules follow.</p>
4306<p>The clauses in a query block are conceptually processed in the following order:</p>
4307<ul>
4308
4309<li><tt>FROM</tt> (followed by LET subclause, if any)</li>
4310<li><tt>WHERE</tt></li>
4311<li><tt>GROUP BY</tt> (followed by LET subclause, if any)</li>
4312<li><tt>HAVING</tt></li>
4313<li><tt>SELECT</tt> or <tt>SELECT VALUE</tt></li>
4314<li><tt>ORDER BY</tt></li>
4315<li><tt>OFFSET</tt></li>
4316<li><tt>LIMIT</tt></li>
4317</ul>
4318<p>During processing of each clause, the variables that are in scope are those variables that are bound in the following places:</p>
4319<ol style="list-style-type: decimal">
4320
4321<li>
4322
4323<p>In earlier clauses of the same query block (as defined by the ordering given above).</p>
4324<p>Example: <tt>FROM orders AS o SELECT o.date</tt> The variable <tt>o</tt> in the <tt>SELECT</tt> clause is bound, in turn, to each object in the dataset <tt>orders</tt>.</p>
4325</li>
4326<li>
4327
4328<p>In outer query blocks in which the current query block is nested. In case of duplication, the innermost binding wins.</p>
4329</li>
4330<li>
4331
4332<p>In the <tt>WITH</tt> clause (if any) at the beginning of the query.</p>
4333</li>
4334</ol>
4335<p>However, in a query block where a <tt>GROUP BY</tt> clause is present:</p>
4336<ol style="list-style-type: decimal">
4337
4338<li>
4339
4340<p>In clauses processed before <tt>GROUP BY</tt>, scoping rules are the same as though no GROUP BY were present.</p>
4341</li>
4342<li>
4343
4344<p>In clauses processed after <tt>GROUP BY</tt>, the variables bound in the nearest <tt>FROM</tt>-clause (and its <tt>LET</tt> subclause, if any) are removed from scope and replaced by the variables bound in the <tt>GROUP BY</tt> clause (and its <tt>LET</tt> subclause, if any). However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (<tt>MIN</tt>, <tt>MAX</tt>, <tt>AVG</tt>, <tt>SUM</tt>, and <tt>COUNT</tt>). These functions still need to see the individual data items over which they are computing an aggregation. For example, after <tt>FROM employee AS e GROUP BY deptno</tt>, it would not be valid to reference <tt>e.salary</tt>, but <tt>AVG(e.salary)</tt> would be valid.</p>
4345</li>
4346</ol>
4347<p>Special case: In an expression inside a <tt>FROM</tt> clause, a variable is in scope if it was bound in an earlier expression in the same <tt>FROM</tt> clause. Example:</p>
4348
4349<div>
4350<div>
4351<pre class="source">FROM orders AS o, o.items AS i
4352</pre></div></div>
4353
4354<p>The reason for this special case is to support iteration over nested collections.</p>
4355<p>Note that, since the <tt>SELECT</tt> clause comes <i>after</i> the <tt>WHERE</tt> and <tt>GROUP BY</tt> clauses in conceptual processing order, any variables defined in <tt>SELECT</tt> are not visible in <tt>WHERE</tt> or <tt>GROUP BY</tt>. Therefore the following query will not return what might be the expected result (since in the WHERE clause, <tt>pay</tt> will be interpreted as a field in the <tt>emp</tt> object rather than as the computed value <tt>salary + bonus</tt>):</p>
4356
4357<div>
4358<div>
4359<pre class="source">SELECT name, salary + bonus AS pay
4360FROM emp
4361WHERE pay &gt; 1000
4362ORDER BY pay
4363</pre></div></div>
4364
4365<p>The likely intent of the query above can be accomplished as follows:</p>
4366
4367<div>
4368<div>
4369<pre class="source">FROM emp AS e
4370LET pay = e.salary + e.bonus
4371WHERE pay &gt; 1000
4372SELECT e.name, pay
4373ORDER BY pay
4374</pre></div></div>
4375
4376<p>Note: In the phrase <i>expr1</i> <tt>JOIN</tt> <i>expr2</i> <tt>ON</tt> <i>expr3</i>, variables defined in <i>expr1</i> are visible in <i>expr3</i> but not in <i>expr2</i>. Here&#x2019;s an example that will not work:</p>
4377
4378<div>
4379<div>
4380<pre class="source">FROM orders AS o JOIN o.items AS i ON 1 = 1
4381</pre></div></div>
4382
4383<p>The variable <tt>o</tt>, defined in the phrase before <tt>JOIN</tt>, cannot be used in the phrase immediately following <tt>JOIN</tt>. The probable intent of this example could be accomplished in either of the following ways:</p>
4384
4385<div>
4386<div>
4387<pre class="source">FROM orders AS o UNNEST o.items AS i
4388
4389FROM orders AS o, o.items AS i
4390</pre></div></div>
4391
4392<p>To summarize this rule: You may not use left-correlation in an explicit <tt>JOIN</tt> clause.</p></div>
4393<div class="section">
4394<h2><a name="Resolving_Names"></a><a name="Resolving_names" id="Resolving_names">Resolving Names</a></h2>
4395<p>The process of name resolution begins with the leftmost identifier in the name. The rules for resolving the leftmost identifier are:</p>
4396<ol style="list-style-type: decimal">
4397
4398<li>
4399
4400<p><i>In a <tt>FROM</tt> clause</i>: Names in a <tt>FROM</tt> clause identify the collections over which the query block will iterate. These collections may be stored datasets or may be the results of nested query blocks. A stored dataset may be in a named dataverse or in the default dataverse. Thus, if the two-part name <tt>a.b</tt> is in a <tt>FROM</tt> clause, a might represent a dataverse and <tt>b</tt> might represent a dataset in that dataverse. Another example of a two-part name in a <tt>FROM</tt> clause is <tt>FROM orders AS o, o.items AS i</tt>. In <tt>o.items</tt>, <tt>o</tt> represents an order object bound earlier in the <tt>FROM</tt> clause, and items represents the items object inside that order.</p>
4401<p>The rules for resolving the leftmost identifier in a <tt>FROM</tt> clause (including a <tt>JOIN</tt> subclause), or in the expression following <tt>IN</tt> in a quantified predicate, are as follows:</p>
4402<ul>
4403
4404<li>
4405
4406<p>(1A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery).</p>
4407</li>
4408<li>
4409
4410<p>(1B): Otherwise, if the identifier is the first part of a two-part name like <tt>a.b</tt>, the name is treated as <tt>dataverse.dataset</tt>. If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse. If the designated dataset exists then the identifier is resolved to that dataset, otherwise if a synonym with given name exists then the identifier is resolved to the target dataset of that synonym (potentially recursively if this synonym points to another synonym). An error will result if the designated dataset or a synonym with this name does not exist.</p>
4411<p>Datasets take precedence over synonyms, so if both a dataset and a synonym have the same name then the resolution is to the dataset.</p>
4412</li>
4413</ul>
4414</li>
4415<li>
4416
4417<p><i>Elsewhere in a query block</i>: In clauses other than <tt>FROM</tt>, a name typically identifies a field of some object. For example, if the expression <tt>a.b</tt> is in a <tt>SELECT</tt> or <tt>WHERE</tt> clause, it&#x2019;s likely that <tt>a</tt> represents an object and <tt>b</tt> represents a field in that object.</p>
4418<p>The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are:</p>
4419<ul>
4420
4421<li>
4422
4423<p>(2A): If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable. (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block).</p>
4424</li>
4425<li>
4426
4427<p>(2B): (The &#x201c;Single Variable Rule&#x201d;): Otherwise, if the <tt>FROM</tt> clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable. For example, in the query <tt>FROM customer SELECT address</tt>, the identifier address is treated as a field in the object bound to the variable <tt>customer</tt>. At runtime, if the object bound to <tt>customer</tt> has no <tt>address</tt> field, the <tt>address</tt> expression will return <tt>missing</tt>. If the <tt>FROM</tt> clause in the current query block binds multiple variables, name resolution fails with an &#x201c;ambiguous name&#x201d; error. If there&#x2019;s no <tt>FROM</tt> clause in the current query block, name resolution fails with an &#x201c;undefined identifier&#x201d; error. Note that the Single Variable Rule searches for bound variables only in the current query block, not in outer (containing) blocks. The purpose of this rule is to permit the compiler to resolve field-references unambiguously without relying on any schema information. Also note that variables defined by <tt>LET</tt> clauses do not participate in the resolution process performed by this rule.</p>
4428<p>Exception: In a query that has a <tt>GROUP BY</tt> clause, the Single Variable Rule does not apply in any clauses that occur after the <tt>GROUP BY</tt> because, in these clauses, the variables bound by the <tt>FROM</tt> clause are no longer in scope. In clauses after <tt>GROUP BY</tt>, only Rule (2A) applies.</p>
4429</li>
4430</ul>
4431</li>
4432<li>
4433
4434<p>In an <tt>ORDER BY</tt> clause following a <tt>UNION ALL</tt> expression:</p>
4435<p>The leftmost identifier is treated as a field-access on the objects that are generated by the <tt>UNION ALL</tt>. For example:</p>
4436
4437<div>
4438<div>
4439<pre class="source">query-block-1
4440UNION ALL
4441query-block-2
4442ORDER BY salary
4443</pre></div></div>
4444
4445<p>In the result of this query, objects that have a foo field will be ordered by the value of this field; objects that have no foo field will appear at at the beginning of the query result (in ascending order) or at the end (in descending order.)</p>
4446</li>
4447<li>
4448
4449<p><i>In a standalone expression</i>: If a query consists of a standalone expression then identifiers inside that expression are resolved according to Rule 1. For example, if the whole query is <tt>ARRAY_COUNT(a.b)</tt> then <tt>a.b</tt> will be treated as dataset <tt>b</tt> contained in dataverse <tt>a</tt>. Note that this rule only applies to identifiers which are located directly inside a standalone expression. Identifiers inside <tt>SELECT</tt> statements in a standalone expression are still resolved according to Rules 1-3. For example, if the whole query is <tt>ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )</tt> then <tt>salary</tt> is resolved as <tt>e.salary</tt> following the &#x201c;Single Variable Rule&#x201d; (Rule (2B)).</p>
4450</li>
4451<li>
4452
4453<p>Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object. The name resolves to the field at the end of the path. If this field does not exist, the value <tt>missing</tt> is returned.</p>
4454</li>
4455</ol><!--
4456 ! Licensed to the Apache Software Foundation (ASF) under one
4457 ! or more contributor license agreements. See the NOTICE file
4458 ! distributed with this work for additional information
4459 ! regarding copyright ownership. The ASF licenses this file
4460 ! to you under the Apache License, Version 2.0 (the
4461 ! "License"); you may not use this file except in compliance
4462 ! with the License. You may obtain a copy of the License at
4463 !
4464 ! http://www.apache.org/licenses/LICENSE-2.0
4465 !
4466 ! Unless required by applicable law or agreed to in writing,
4467 ! software distributed under the License is distributed on an
4468 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4469 ! KIND, either express or implied. See the License for the
4470 ! specific language governing permissions and limitations
4471 ! under the License.
4472 !-->
4473</div>
4474<div class="section">
4475<h2><a name="Appendix_4._Example_Data"></a><a name="Manual_data" id="Manual_data">Appendix 4. Example Data</a></h2><!--
4476 ! Licensed to the Apache Software Foundation (ASF) under one
4477 ! or more contributor license agreements. See the NOTICE file
4478 ! distributed with this work for additional information
4479 ! regarding copyright ownership. The ASF licenses this file
4480 ! to you under the Apache License, Version 2.0 (the
4481 ! "License"); you may not use this file except in compliance
4482 ! with the License. You may obtain a copy of the License at
4483 !
4484 ! http://www.apache.org/licenses/LICENSE-2.0
4485 !
4486 ! Unless required by applicable law or agreed to in writing,
4487 ! software distributed under the License is distributed on an
4488 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4489 ! KIND, either express or implied. See the License for the
4490 ! specific language governing permissions and limitations
4491 ! under the License.
4492 !-->
4493
4494<p>This appendix lists the data definitions and the datasets used for the examples provided throughout this manual.</p>
4495<div class="section">
4496<h3><a name="Data_Definitions"></a><a name="definition_statements" id="definition_statements">Data Definitions</a></h3>
4497
4498<div>
4499<div>
4500<pre class="source">CREATE DATAVERSE Commerce IF NOT EXISTS;
4501
4502USE Commerce;
4503
4504CREATE TYPE addressType AS {
4505 street: string,
4506 city: string,
4507 zipcode: string?
4508};
4509
4510CREATE TYPE customerType AS {
4511 custid: string,
4512 name: string,
4513 address: addressType?
4514};
4515
4516CREATE DATASET customers(customerType)
4517 PRIMARY KEY custid;
4518
4519CREATE TYPE itemType AS {
4520 itemno: int,
4521 qty: int,
4522 price: int
4523};
4524
4525CREATE TYPE orderType AS {
4526 orderno: int,
4527 custid: string,
4528 order_date: string,
4529 ship_date: string?,
4530 items: [ itemType ]
4531};
4532
4533CREATE DATASET orders(orderType)
4534 PRIMARY KEY orderno;
4535</pre></div></div>
4536</div>
4537<div class="section">
4538<h3><a name="Customers_Data"></a><a name="customers_data" id="customers_data">Customers Data</a></h3>
4539
4540<div>
4541<div>
4542<pre class="source">[
4543 {
4544 &quot;custid&quot;: &quot;C13&quot;,
4545 &quot;name&quot;: &quot;T. Cody&quot;,
4546 &quot;address&quot;: {
4547 &quot;street&quot;: &quot;201 Main St.&quot;,
4548 &quot;city&quot;: &quot;St. Louis, MO&quot;,
4549 &quot;zipcode&quot;: &quot;63101&quot;
4550 },
4551 &quot;rating&quot;: 750
4552 },
4553 {
4554 &quot;custid&quot;: &quot;C25&quot;,
4555 &quot;name&quot;: &quot;M. Sinclair&quot;,
4556 &quot;address&quot;: {
4557 &quot;street&quot;: &quot;690 River St.&quot;,
4558 &quot;city&quot;: &quot;Hanover, MA&quot;,
4559 &quot;zipcode&quot;: &quot;02340&quot;
4560 },
4561 &quot;rating&quot;: 690
4562 },
4563 {
4564 &quot;custid&quot;: &quot;C31&quot;,
4565 &quot;name&quot;: &quot;B. Pruitt&quot;,
4566 &quot;address&quot;: {
4567 &quot;street&quot;: &quot;360 Mountain Ave.&quot;,
4568 &quot;city&quot;: &quot;St. Louis, MO&quot;,
4569 &quot;zipcode&quot;: &quot;63101&quot;
4570 }
4571 },
4572 {
4573 &quot;custid&quot;: &quot;C35&quot;,
4574 &quot;name&quot;: &quot;J. Roberts&quot;,
4575 &quot;address&quot;: {
4576 &quot;street&quot;: &quot;420 Green St.&quot;,
4577 &quot;city&quot;: &quot;Boston, MA&quot;,
4578 &quot;zipcode&quot;: &quot;02115&quot;
4579 },
4580 &quot;rating&quot;: 565
4581 },
4582 {
4583 &quot;custid&quot;: &quot;C37&quot;,
4584 &quot;name&quot;: &quot;T. Henry&quot;,
4585 &quot;address&quot;: {
4586 &quot;street&quot;: &quot;120 Harbor Blvd.&quot;,
4587 &quot;city&quot;: &quot;Boston, MA&quot;,
4588 &quot;zipcode&quot;: &quot;02115&quot;
4589 },
4590 &quot;rating&quot;: 750
4591 },
4592 {
4593 &quot;custid&quot;: &quot;C41&quot;,
4594 &quot;name&quot;: &quot;R. Dodge&quot;,
4595 &quot;address&quot;: {
4596 &quot;street&quot;: &quot;150 Market St.&quot;,
4597 &quot;city&quot;: &quot;St. Louis, MO&quot;,
4598 &quot;zipcode&quot;: &quot;63101&quot;
4599 },
4600 &quot;rating&quot;: 640
4601 },
4602 {
4603 &quot;custid&quot;: &quot;C47&quot;,
4604 &quot;name&quot;: &quot;S. Logan&quot;,
4605 &quot;address&quot;: {
4606 &quot;street&quot;: &quot;Via del Corso&quot;,
4607 &quot;city&quot;: &quot;Rome, Italy&quot;
4608 },
4609 &quot;rating&quot;: 625
4610 }
4611]
4612</pre></div></div>
4613</div>
4614<div class="section">
4615<h3><a name="Orders_Data"></a><a name="orders_data" id="orders_data">Orders Data</a></h3>
4616
4617<div>
4618<div>
4619<pre class="source">[
4620 {
4621 &quot;orderno&quot;: 1001,
4622 &quot;custid&quot;: &quot;C41&quot;,
4623 &quot;order_date&quot;: &quot;2020-04-29&quot;,
4624 &quot;ship_date&quot;: &quot;2020-05-03&quot;,
4625 &quot;items&quot;: [
4626 {
4627 &quot;itemno&quot;: 347,
4628 &quot;qty&quot;: 5,
4629 &quot;price&quot;: 19.99
4630 },
4631 {
4632 &quot;itemno&quot;: 193,
4633 &quot;qty&quot;: 2,
4634 &quot;price&quot;: 28.89
4635 }
4636 ]
4637 },
4638 {
4639 &quot;orderno&quot;: 1002,
4640 &quot;custid&quot;: &quot;C13&quot;,
4641 &quot;order_date&quot;: &quot;2020-05-01&quot;,
4642 &quot;ship_date&quot;: &quot;2020-05-03&quot;,
4643 &quot;items&quot;: [
4644 {
4645 &quot;itemno&quot;: 460,
4646 &quot;qty&quot;: 95,
4647 &quot;price&quot;: 100.99
4648 },
4649 {
4650 &quot;itemno&quot;: 680,
4651 &quot;qty&quot;: 150,
4652 &quot;price&quot;: 8.75
4653 }
4654 ]
4655 },
4656 {
4657 &quot;orderno&quot;: 1003,
4658 &quot;custid&quot;: &quot;C31&quot;,
4659 &quot;order_date&quot;: &quot;2020-06-15&quot;,
4660 &quot;ship_date&quot;: &quot;2020-06-16&quot;,
4661 &quot;items&quot;: [
4662 {
4663 &quot;itemno&quot;: 120,
4664 &quot;qty&quot;: 2,
4665 &quot;price&quot;: 88.99
4666 },
4667 {
4668 &quot;itemno&quot;: 460,
4669 &quot;qty&quot;: 3,
4670 &quot;price&quot;: 99.99
4671 }
4672 ]
4673 },
4674 {
4675 &quot;orderno&quot;: 1004,
4676 &quot;custid&quot;: &quot;C35&quot;,
4677 &quot;order_date&quot;: &quot;2020-07-10&quot;,
4678 &quot;ship_date&quot;: &quot;2020-07-15&quot;,
4679 &quot;items&quot;: [
4680 {
4681 &quot;itemno&quot;: 680,
4682 &quot;qty&quot;: 6,
4683 &quot;price&quot;: 9.99
4684 },
4685 {
4686 &quot;itemno&quot;: 195,
4687 &quot;qty&quot;: 4,
4688 &quot;price&quot;: 35
4689 }
4690 ]
4691 },
4692 {
4693 &quot;orderno&quot;: 1005,
4694 &quot;custid&quot;: &quot;C37&quot;,
4695 &quot;order_date&quot;: &quot;2020-08-30&quot;,
4696 &quot;items&quot;: [
4697 {
4698 &quot;itemno&quot;: 460,
4699 &quot;qty&quot;: 2,
4700 &quot;price&quot;: 99.98
4701 },
4702 {
4703 &quot;itemno&quot;: 347,
4704 &quot;qty&quot;: 120,
4705 &quot;price&quot;: 22
4706 },
4707 {
4708 &quot;itemno&quot;: 780,
4709 &quot;qty&quot;: 1,
4710 &quot;price&quot;: 1500
4711 },
4712 {
4713 &quot;itemno&quot;: 375,
4714 &quot;qty&quot;: 2,
4715 &quot;price&quot;: 149.98
4716 }
4717 ]
4718 },
4719 {
4720 &quot;orderno&quot;: 1006,
4721 &quot;custid&quot;: &quot;C41&quot;,
4722 &quot;order_date&quot;: &quot;2020-09-02&quot;,
4723 &quot;ship_date&quot;: &quot;2020-09-04&quot;,
4724 &quot;items&quot;: [
4725 {
4726 &quot;itemno&quot;: 680,
4727 &quot;qty&quot;: 51,
4728 &quot;price&quot;: 25.98
4729 },
4730 {
4731 &quot;itemno&quot;: 120,
4732 &quot;qty&quot;: 65,
4733 &quot;price&quot;: 85
4734 },
4735 {
4736 &quot;itemno&quot;: 460,
4737 &quot;qty&quot;: 120,
4738 &quot;price&quot;: 99.98
4739 }
4740 ]
4741 },
4742 {
4743 &quot;orderno&quot;: 1007,
4744 &quot;custid&quot;: &quot;C13&quot;,
4745 &quot;order_date&quot;: &quot;2020-09-13&quot;,
4746 &quot;ship_date&quot;: &quot;2020-09-20&quot;,
4747 &quot;items&quot;: [
4748 {
4749 &quot;itemno&quot;: 185,
4750 &quot;qty&quot;: 5,
4751 &quot;price&quot;: 21.99
4752 },
4753 {
4754 &quot;itemno&quot;: 680,
4755 &quot;qty&quot;: 1,
4756 &quot;price&quot;: 20.5
4757 }
4758 ]
4759 },
4760 {
4761 &quot;orderno&quot;: 1008,
4762 &quot;custid&quot;: &quot;C13&quot;,
4763 &quot;order_date&quot;: &quot;2020-10-13&quot;,
4764 &quot;items&quot;: [
4765 {
4766 &quot;itemno&quot;: 460,
4767 &quot;qty&quot;: 20,
4768 &quot;price&quot;: 99.99
4769 }
4770 ]
4771 },
4772 {
4773 &quot;orderno&quot;: 1009,
4774 &quot;custid&quot;: &quot;C13&quot;,
4775 &quot;order_date&quot;: &quot;2020-10-13&quot;,
4776 &quot;items&quot;: []
4777 }
4778]
4779</pre></div></div></div></div>
4780 </div>
4781 </div>
4782 </div>
4783 <hr/>
4784 <footer>
4785 <div class="container-fluid">
4786 <div class="row-fluid">
4787<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
4788 feather logo, and the Apache AsterixDB project logo are either
4789 registered trademarks or trademarks of The Apache Software
4790 Foundation in the United States and other countries.
4791 All other marks mentioned may be trademarks or registered
4792 trademarks of their respective owners.
4793 </div>
4794 </div>
4795 </div>
4796 </footer>
4797 </body>
4798</html>