blob: 845214fa67c3c3a88eb9dc243fc464beb9a43cbf [file] [log] [blame]
Ian Maxond5b11d82017-01-25 10:48:05 -08001<!DOCTYPE html>
2<!--
3 | Generated by Apache Maven Doxia at 2017-01-25
4 | Rendered using Apache Maven Fluido Skin 1.3.0
5-->
6<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7 <head>
8 <meta charset="UTF-8" />
9 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10 <meta name="Date-Revision-yyyymmdd" content="20170125" />
11 <meta http-equiv="Content-Language" content="en" />
12 <title>AsterixDB &#x2013; The SQL++ Query Language</title>
13 <link rel="stylesheet" href="../css/apache-maven-fluido-1.3.0.min.css" />
14 <link rel="stylesheet" href="../css/site.css" />
15 <link rel="stylesheet" href="../css/print.css" media="print" />
16
17
18 <script type="text/javascript" src="../js/apache-maven-fluido-1.3.0.min.js"></script>
19
20
21
22<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
23 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
24 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
25 })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
26
27 ga('create', 'UA-41536543-1', 'uci.edu');
28 ga('send', 'pageview');</script>
29
30 </head>
31 <body class="topBarDisabled">
32
33
34
35
36 <div class="container-fluid">
37 <div id="banner">
38 <div class="pull-left">
39 <a href=".././" id="bannerLeft">
40 <img src="../images/asterixlogo.png" alt="AsterixDB"/>
41 </a>
42 </div>
43 <div class="pull-right"> </div>
44 <div class="clear"><hr/></div>
45 </div>
46
47 <div id="breadcrumbs">
48 <ul class="breadcrumb">
49
50
51 <li id="publishDate">Last Published: 2017-01-25</li>
52
53
54
55 <li id="projectVersion" class="pull-right">Version: 0.9.0</li>
56
57 <li class="divider pull-right">|</li>
58
59 <li class="pull-right"> <a href="../index.html" title="Documentation Home">
60 Documentation Home</a>
61 </li>
62
63 </ul>
64 </div>
65
66
67 <div class="row-fluid">
68 <div id="leftColumn" class="span3">
69 <div class="well sidebar-nav">
70
71
72 <ul class="nav nav-list">
73 <li class="nav-header">Get Started - Installation</li>
74
75 <li>
76
77 <a href="../ncservice.html" title="Option 1: using NCService">
78 <i class="none"></i>
79 Option 1: using NCService</a>
80 </li>
81
82 <li>
83
84 <a href="../install.html" title="Option 2: using Managix">
85 <i class="none"></i>
86 Option 2: using Managix</a>
87 </li>
88
89 <li>
90
91 <a href="../yarn.html" title="Option 3: using YARN">
92 <i class="none"></i>
93 Option 3: using YARN</a>
94 </li>
95 <li class="nav-header">AsterixDB Primer</li>
96
97 <li>
98
99 <a href="../sqlpp/primer-sqlpp.html" title="Option 1: using SQL++">
100 <i class="none"></i>
101 Option 1: using SQL++</a>
102 </li>
103
104 <li>
105
106 <a href="../aql/primer.html" title="Option 2: using AQL">
107 <i class="none"></i>
108 Option 2: using AQL</a>
109 </li>
110 <li class="nav-header">Data Model</li>
111
112 <li>
113
114 <a href="../datamodel.html" title="The Asterix Data Model">
115 <i class="none"></i>
116 The Asterix Data Model</a>
117 </li>
118 <li class="nav-header">Queries - SQL++</li>
119
120 <li class="active">
121
122 <a href="#"><i class="none"></i>The SQL++ Query Language</a>
123 </li>
124
125 <li>
126
127 <a href="../sqlpp/builtins.html" title="Builtin Functions">
128 <i class="none"></i>
129 Builtin Functions</a>
130 </li>
131 <li class="nav-header">Queries - AQL</li>
132
133 <li>
134
135 <a href="../aql/manual.html" title="The Asterix Query Language (AQL)">
136 <i class="none"></i>
137 The Asterix Query Language (AQL)</a>
138 </li>
139
140 <li>
141
142 <a href="../aql/builtins.html" title="Builtin Functions">
143 <i class="none"></i>
144 Builtin Functions</a>
145 </li>
146 <li class="nav-header">Advanced Features</li>
147
148 <li>
149
150 <a href="../aql/similarity.html" title="Support of Similarity Queries">
151 <i class="none"></i>
152 Support of Similarity Queries</a>
153 </li>
154
155 <li>
156
157 <a href="../aql/fulltext.html" title="Support of Full-text Queries">
158 <i class="none"></i>
159 Support of Full-text Queries</a>
160 </li>
161
162 <li>
163
164 <a href="../aql/externaldata.html" title="Accessing External Data">
165 <i class="none"></i>
166 Accessing External Data</a>
167 </li>
168
169 <li>
170
171 <a href="../feeds/tutorial.html" title="Support for Data Ingestion">
172 <i class="none"></i>
173 Support for Data Ingestion</a>
174 </li>
175
176 <li>
177
178 <a href="../udf.html" title="User Defined Functions">
179 <i class="none"></i>
180 User Defined Functions</a>
181 </li>
182
183 <li>
184
185 <a href="../aql/filters.html" title="Filter-Based LSM Index Acceleration">
186 <i class="none"></i>
187 Filter-Based LSM Index Acceleration</a>
188 </li>
189 <li class="nav-header">API/SDK</li>
190
191 <li>
192
193 <a href="../api.html" title="HTTP API">
194 <i class="none"></i>
195 HTTP API</a>
196 </li>
197 </ul>
198
199
200
201 <hr class="divider" />
202
203 <div id="poweredBy">
204 <div class="clear"></div>
205 <div class="clear"></div>
206 <div class="clear"></div>
207 <a href=".././" title="AsterixDB" class="builtBy">
208 <img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" />
209 </a>
210 </div>
211 </div>
212 </div>
213
214
215 <div id="bodyColumn" class="span9" >
216
217 <!-- ! Licensed to the Apache Software Foundation (ASF) under one
218 ! or more contributor license agreements. See the NOTICE file
219 ! distributed with this work for additional information
220 ! regarding copyright ownership. The ASF licenses this file
221 ! to you under the Apache License, Version 2.0 (the
222 ! "License"); you may not use this file except in compliance
223 ! with the License. You may obtain a copy of the License at
224 !
225 ! http://www.apache.org/licenses/LICENSE-2.0
226 !
227 ! Unless required by applicable law or agreed to in writing,
228 ! software distributed under the License is distributed on an
229 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
230 ! KIND, either express or implied. See the License for the
231 ! specific language governing permissions and limitations
232 ! under the License.
233 ! --><h1>The SQL++ Query Language</h1>
234<div class="section">
235<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
236
237<ul>
238
239<li><a href="#Introduction">1. Introduction</a></li>
240
241<li><a href="#Expressions">2. Expressions</a>
242
243<ul>
244
245<li><a href="#Operator_expressions">Operator expressions</a>
246
247<ul>
248
249<li><a href="#Arithmetic_operators">Arithmetic operators</a></li>
250
251<li><a href="#Collection_operators">Collection operators</a></li>
252
253<li><a href="#Comparison_operators">Comparison operators</a></li>
254
255<li><a href="#Logical_operators">Logical operators</a></li>
256 </ul></li>
257
258<li><a href="#Case_expressions">Case expressions</a></li>
259
260<li><a href="#Quantified_expressions">Quantified expressions</a></li>
261
262<li><a href="#Path_expressions">Path expressions</a></li>
263
264<li><a href="#Primary_expressions">Primary expressions</a>
265
266<ul>
267
268<li><a href="#Literals">Literals</a></li>
269
270<li><a href="#Variable_references">Variable references</a></li>
271
272<li><a href="#Parenthesized_expressions">Parenthesized expressions</a></li>
273
274<li><a href="#Function_call_expressions">Function call expressions</a></li>
275
276<li><a href="#Constructors">Constructors</a></li>
277 </ul></li>
278 </ul></li>
279
280<li><a href="#Queries">3. Queries</a>
281
282<ul>
283
284<li><a href="#SELECT_statements">SELECT statements</a></li>
285
286<li><a href="#Select_clauses">SELECT clauses</a>
287
288<ul>
289
290<li><a href="#Select_element">Select element/value/raw</a></li>
291
292<li><a href="#SQL_select">SQL-style select</a></li>
293
294<li><a href="#Select_star">Select *</a></li>
295
296<li><a href="#Select_distinct">Select distinct</a></li>
297
298<li><a href="#Unnamed_projections">Unnamed projections</a></li>
299
300<li><a href="#Abbreviatory_field_access_expressions">Abbreviatory field access expressions</a></li>
301 </ul></li>
302
303<li><a href="#Unnest_clauses">UNNEST clauses</a>
304
305<ul>
306
307<li><a href="#Inner_unnests">Inner unnests</a></li>
308
309<li><a href="#Left_outer_unnests">Left outer unnests</a></li>
310
311<li><a href="#Expressing_joins_using_unnests">Expressing joins using unnests</a></li>
312 </ul></li>
313
314<li><a href="#From_clauses">FROM clauses</a>
315
316<ul>
317
318<li><a href="#Binding_expressions">Binding expressions</a></li>
319
320<li><a href="#Multiple_from_terms">Multiple from terms</a></li>
321
322<li><a href="#Expressing_joins_using_from_terms">Expressing joins using from terms</a></li>
323
324<li><a href="#Implicit_binding_variables">Implicit binding variables</a></li>
325 </ul></li>
326
327<li><a href="#Join_clauses">JOIN clauses</a>
328
329<ul>
330
331<li><a href="#Inner_joins">Inner joins</a></li>
332
333<li><a href="#Left_outer_joins">Left outer joins</a></li>
334 </ul></li>
335
336<li><a href="#Group_By_clauses">GROUP BY clauses</a>
337
338<ul>
339
340<li><a href="#Group_variables">Group variables</a></li>
341
342<li><a href="#Implicit_group_key_variables">Implicit group key variables</a></li>
343
344<li><a href="#Implicit_group_variables">Implicit group variables</a></li>
345
346<li><a href="#Aggregation_functions">Aggregation functions</a></li>
347
348<li><a href="#SQL-92_aggregation_functions">SQL-92 aggregation functions</a></li>
349
350<li><a href="#SQL-92_compliant_gby">SQL-92 compliant GROUP BY aggregations</a></li>
351
352<li><a href="#Column_aliases">Column aliases</a></li>
353 </ul></li>
354
355<li><a href="#Where_having_clauses">WHERE clauases and HAVING clauses</a></li>
356
357<li><a href="#Order_By_clauses">ORDER BY clauses</a></li>
358
359<li><a href="#Limit_clauses">LIMIT clauses</a></li>
360
361<li><a href="#With_clauses">WITH clauses</a></li>
362
363<li><a href="#Let_clauses">LET clauses</a></li>
364
365<li><a href="#Union_all">UNION ALL</a></li>
366
367<li><a href="#Vs_SQL-92">SQL++ Vs. SQL-92</a></li>
368 </ul></li>
369
370<li><a href="#Errors">4. Errors</a>
371
372<ul>
373
374<li><a href="#Syntax_errors">Syntax errors</a></li>
375
376<li><a href="#Parsing_errors">Identifier resolution errors</a></li>
377
378<li><a href="#Type_errors">Type errors</a></li>
379
380<li><a href="#Resource_errors">Resource errors</a></li>
381 </ul></li>
382
383<li><a href="#DDL_and_DML_statements">5. DDL and DML statements</a>
384
385<ul>
386
387<li><a href="#Declarations">Declarations</a></li>
388
389<li><a href="#Lifecycle_management_statements">Lifecycle management statements</a>
390
391<ul>
392
393<li><a href="#Dataverses">Dataverses</a></li>
394
395<li><a href="#Datasets">Datasets</a></li>
396
397<li><a href="#Types">Types</a></li>
398
399<li><a href="#Functions">Functions</a></li>
400 </ul></li>
401
402<li><a href="#Modification_statements">Modification statements</a>
403
404<ul>
405
406<li><a href="#Inserts">Inserts</a></li>
407
408<li><a href="#Upserts">Upserts</a></li>
409
410<li><a href="#Deletes">Deletes</a></li>
411 </ul></li>
412 </ul></li>
413
414<li><a href="#Reserved_keywords">Appendix 1. Reserved keywords</a></li>
415</ul>
416<!-- ! Licensed to the Apache Software Foundation (ASF) under one
417 ! or more contributor license agreements. See the NOTICE file
418 ! distributed with this work for additional information
419 ! regarding copyright ownership. The ASF licenses this file
420 ! to you under the Apache License, Version 2.0 (the
421 ! "License"); you may not use this file except in compliance
422 ! with the License. You may obtain a copy of the License at
423 !
424 ! http://www.apache.org/licenses/LICENSE-2.0
425 !
426 ! Unless required by applicable law or agreed to in writing,
427 ! software distributed under the License is distributed on an
428 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
429 ! KIND, either express or implied. See the License for the
430 ! specific language governing permissions and limitations
431 ! under the License.
432 ! -->
433<h1><a name="Introduction" id="Introduction">1. Introduction</a><font size="3" /></h1>
434<p>This document is intended as a reference guide to the full syntax and semantics of the SQL++ Query Language, a SQL-inspired language for working with semistructured data. SQL++ has much in common with SQL, but some differences do exist due to the different data models that the two languages were designed to serve. SQL was designed in the 1970&#x2019;s for interacting with the flat, schema-ified world of relational databases, while SQL++ is much newer and targets the nested, schema-optional (or even schema-less) world of modern NoSQL systems.</p>
435<p>In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model (<a href="../datamodel.html">ADM</a>),a data model based on a superset of JSON with an enriched and flexible type system. New AsterixDB users are encouraged to read and work through the (much friendlier) guide &#x201c;<a href="primer-sqlpp.html">AsterixDB 101: An ADM and SQL++ Primer</a>&#x201d; before attempting to make use of this document. In addition, readers are advised to read through the <a href="../datamodel.html">Asterix Data Model (ADM) reference guide</a> first as well, as an understanding of the data model is a prerequisite to understanding SQL++.</p>
436<p>In what follows, we detail the features of the SQL++ language in a grammar-guided manner. We list and briefly explain each of the productions in the SQL++ grammar, offering examples (and results) for clarity.</p>
437<!-- ! Licensed to the Apache Software Foundation (ASF) under one
438 ! or more contributor license agreements. See the NOTICE file
439 ! distributed with this work for additional information
440 ! regarding copyright ownership. The ASF licenses this file
441 ! to you under the Apache License, Version 2.0 (the
442 ! "License"); you may not use this file except in compliance
443 ! with the License. You may obtain a copy of the License at
444 !
445 ! http://www.apache.org/licenses/LICENSE-2.0
446 !
447 ! Unless required by applicable law or agreed to in writing,
448 ! software distributed under the License is distributed on an
449 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
450 ! KIND, either express or implied. See the License for the
451 ! specific language governing permissions and limitations
452 ! under the License.
453 ! -->
454<h1><a name="Expressions" id="Expressions">2. Expressions</a></h1>
455
456<div class="source">
457<div class="source">
458<pre>Expression ::= OperatorExpression | CaseExpression | QuantifiedExpression
459</pre></div></div>
460<p>SQL++ is a highly composable expression language. Each SQL++ expression returns zero or more data model instances. There are three major kinds of expressions in SQL++. At the topmost level, a SQL++ expression can be an OperatorExpression (similar to a mathematical expression), an ConditionalExpression (to choose between alternative values), or a QuantifiedExpression (which yields a boolean value). Each will be detailed as we explore the full SQL++ grammar.</p>
461<p>Note that in the following text, words enclosed in angle brackets denote keywords that are not case-sensitive.</p></div>
462<div class="section">
463<h2><a name="Operator_expressions" id="Operator_expressions">Operator expressions</a></h2>
464<p>Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows:</p>
465
466<div class="source">
467<div class="source">
468<pre>OperatorExpression ::= PathExpression
469 | Operator OperatorExpression
470 | OperatorExpression Operator (OperatorExpression)?
471 | OperatorExpression &lt;BETWEEN&gt; OperatorExpression &lt;AND&gt; OperatorExpression
472</pre></div></div>
473<p>SQL++ provides a full set of operators that you can use within its statements. Here are the categories of operators:</p>
474
475<ul>
476
477<li><a href="#Arithmetic_operators">Arithmetic operators</a>, to perform basic mathematical operations;</li>
478
479<li><a href="#Collection_operators">Collection operators</a>, to evaluate expressions on collections or objects;</li>
480
481<li><a href="#Comparison_operators">Comparison operators</a>, to compare two expressions;</li>
482
483<li><a href="#Logical_operators">Logical Operators</a>, to combine operators using Boolean logic.</li>
484</ul>
485<p>The following table summarizes the precedence order (from higher to lower) of the major unary and binary operators:</p>
486
487<table border="0" class="table table-striped">
488 <thead>
489
490<tr class="a">
491
492<th>Operator </th>
493
494<th>Operation </th>
495 </tr>
496 </thead>
497 <tbody>
498
499<tr class="b">
500
501<td>EXISTS, NOT EXISTS </td>
502
503<td>collection emptiness testing </td>
504 </tr>
505
506<tr class="a">
507
508<td>^ </td>
509
510<td>exponentiation </td>
511 </tr>
512
513<tr class="b">
514
515<td>*, /, % </td>
516
517<td>multiplication, division, modulo </td>
518 </tr>
519
520<tr class="a">
521
522<td>+, - </td>
523
524<td>addition, subtraction </td>
525 </tr>
526
527<tr class="b">
528
529<td>|| </td>
530
531<td>string concatenation </td>
532 </tr>
533
534<tr class="a">
535
536<td>IS NULL, IS NOT NULL, IS MISSING, IS NOT MISSING, <br />IS UNKNOWN, IS NOT UNKNOWN</td>
537
538<td>unknown value comparison </td>
539 </tr>
540
541<tr class="b">
542
543<td>BETWEEN, NOT BETWEEN </td>
544
545<td>range comparison (inclusive on both sides) </td>
546 </tr>
547
548<tr class="a">
549
550<td>=, !=, &lt;, &gt;, &lt;=, &gt;=, LIKE, NOT LIKE, IN, NOT IN </td>
551
552<td>comparison </td>
553 </tr>
554
555<tr class="b">
556
557<td>NOT </td>
558
559<td>logical negation </td>
560 </tr>
561
562<tr class="a">
563
564<td>AND </td>
565
566<td>conjunction </td>
567 </tr>
568
569<tr class="b">
570
571<td>OR </td>
572
573<td>disjunction </td>
574 </tr>
575 </tbody>
576</table>
577<p>In general, if any operand evaluates to a <tt>MISSING</tt> value, the enclosing operator will return <tt>MISSING</tt>; if none of operands evaluates to a <tt>MISSING</tt> value but there is an operand evaluates to a <tt>NULL</tt> value, the encolosing operator will return <tt>NULL</tt>. However, there are a few exceptions listed in <a href="#Comparison_operators">comparison operators</a> and <a href="#Logical_operators">logical operators</a>.</p>
578<div class="section">
579<h3><a name="Arithmetic_operators" id="Arithmetic_operators">Arithmetic operators</a></h3>
580<p>Arithemtic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values.</p>
581
582<table border="0" class="table table-striped">
583 <thead>
584
585<tr class="a">
586
587<th>Operator </th>
588
589<th>Purpose </th>
590
591<th>Example </th>
592 </tr>
593 </thead>
594 <tbody>
595
596<tr class="b">
597
598<td>+, - </td>
599
600<td>As unary operators, they denote a <br />positive or negative expression </td>
601
602<td>SELECT VALUE -1; </td>
603 </tr>
604
605<tr class="a">
606
607<td>+, - </td>
608
609<td>As binary operators, they add or subtract </td>
610
611<td>SELECT VALUE 1 + 2; </td>
612 </tr>
613
614<tr class="b">
615
616<td>*, / </td>
617
618<td>Multiply, divide </td>
619
620<td>SELECT VALUE 4 / 2.0; </td>
621 </tr>
622
623<tr class="a">
624
625<td>^ </td>
626
627<td>Exponentiation </td>
628
629<td>SELECT VALUE 2^3; </td>
630 </tr>
631
632<tr class="b">
633
634<td>|| </td>
635
636<td>String concatenation </td>
637
638<td>SELECT VALUE &#x201c;ab&#x201d;||&#x201c;c&#x201d;||&#x201c;d&#x201d;; </td>
639 </tr>
640 </tbody>
641</table></div>
642<div class="section">
643<h3><a name="Collection_operators" id="Collection_operators">Collection operators</a></h3>
644<p>Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS).</p>
645
646<table border="0" class="table table-striped">
647 <thead>
648
649<tr class="a">
650
651<th>Operator </th>
652
653<th>Purpose </th>
654
655<th>Example </th>
656 </tr>
657 </thead>
658 <tbody>
659
660<tr class="b">
661
662<td>IN </td>
663
664<td>Membership test </td>
665
666<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.lang IN [&#x201c;en&#x201d;, &#x201c;de&#x201d;]; </td>
667 </tr>
668
669<tr class="a">
670
671<td>NOT IN </td>
672
673<td>Non-membership test </td>
674
675<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.lang NOT IN [&#x201c;en&#x201d;]; </td>
676 </tr>
677
678<tr class="b">
679
680<td>EXISTS </td>
681
682<td>Check whether a collection is not empty </td>
683
684<td>SELECT * FROM ChirpMessages cm <br />WHERE EXISTS cm.referredTopics; </td>
685 </tr>
686
687<tr class="a">
688
689<td>NOT EXISTS </td>
690
691<td>Check whether a collection is empty </td>
692
693<td>SELECT * FROM ChirpMessages cm <br />WHERE NOT EXISTS cm.referredTopics; </td>
694 </tr>
695 </tbody>
696</table></div>
697<div class="section">
698<h3><a name="Comparison_operators" id="Comparison_operators">Comparison operators</a></h3>
699<p>Comparison operators are used to compare values. The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons. SQL++ (and JSON) has two ways of representing missing information in a object - the presence of the field with a NULL for its value (as in SQL), and the absence of the field (which JSON permits). For example, the first of the following objects represents Jack, whose friend is Jill. In the other examples, Jake is friendless a la SQL, with a friend field that is NULL, while Joe is friendless in a more natural (for JSON) way, i.e., by not having a friend field.</p>
700<div class="section">
701<div class="section">
702<h5><a name="Examples"></a>Examples</h5>
703<p>{&#x201c;name&#x201d;: &#x201c;Jack&#x201d;, &#x201c;friend&#x201d;: &#x201c;Jill&#x201d;}</p>
704<p>{&#x201c;name&#x201d;: &#x201c;Jake&#x201d;, &#x201c;friend&#x201d;: NULL}</p>
705<p>{&#x201c;name&#x201d;: &#x201c;Joe&#x201d;}</p>
706<p>The following table enumerates all of SQL++&#x2019;s comparison operators.</p>
707
708<table border="0" class="table table-striped">
709 <thead>
710
711<tr class="a">
712
713<th>Operator </th>
714
715<th>Purpose </th>
716
717<th>Example </th>
718 </tr>
719 </thead>
720 <tbody>
721
722<tr class="b">
723
724<td>IS NULL </td>
725
726<td>Test if a value is NULL </td>
727
728<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NULL; </td>
729 </tr>
730
731<tr class="a">
732
733<td>IS NOT NULL </td>
734
735<td>Test if a value is not NULL </td>
736
737<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT NULL; </td>
738 </tr>
739
740<tr class="b">
741
742<td>IS MISSING </td>
743
744<td>Test if a value is MISSING </td>
745
746<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS MISSING; </td>
747 </tr>
748
749<tr class="a">
750
751<td>IS NOT MISSING </td>
752
753<td>Test if a value is not MISSING </td>
754
755<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT MISSING;</td>
756 </tr>
757
758<tr class="b">
759
760<td>IS UNKNOWN </td>
761
762<td>Test if a value is NULL or MISSING </td>
763
764<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS UNKNOWN; </td>
765 </tr>
766
767<tr class="a">
768
769<td>IS NOT UNKNOWN </td>
770
771<td>Test if a value is neither NULL nor MISSING </td>
772
773<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT UNKNOWN;</td>
774 </tr>
775
776<tr class="b">
777
778<td>BETWEEN </td>
779
780<td>Test if a value is between a start value and <br />a end value. The comparison is inclusive <br />to both start and end values. </td>
781
782<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId BETWEEN 10 AND 20;</td>
783 </tr>
784
785<tr class="a">
786
787<td>= </td>
788
789<td>Equality test </td>
790
791<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId=10; </td>
792 </tr>
793
794<tr class="b">
795
796<td>!= </td>
797
798<td>Inequality test </td>
799
800<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId!=10;</td>
801 </tr>
802
803<tr class="a">
804
805<td>&lt; </td>
806
807<td>Less than </td>
808
809<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&lt;10; </td>
810 </tr>
811
812<tr class="b">
813
814<td>&gt; </td>
815
816<td>Greater than </td>
817
818<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&gt;10; </td>
819 </tr>
820
821<tr class="a">
822
823<td>&lt;= </td>
824
825<td>Less than or equal to </td>
826
827<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&lt;=10; </td>
828 </tr>
829
830<tr class="b">
831
832<td>&gt;= </td>
833
834<td>Greater than or equal to </td>
835
836<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&gt;=10; </td>
837 </tr>
838
839<tr class="a">
840
841<td>LIKE </td>
842
843<td>Test if the left side matches a<br /> pattern defined on the right<br /> side; in the pattern, &#x201c;%&#x201d; matches <br />any string while &#x201c;_&#x201d; matches <br /> any character. </td>
844
845<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name LIKE &#x201c;%Giesen%&#x201d;;</td>
846 </tr>
847
848<tr class="b">
849
850<td>NOT LIKE </td>
851
852<td>Test if the left side does not <br />match a pattern defined on the right<br /> side; in the pattern, &#x201c;%&#x201d; matches <br />any string while &#x201c;_&#x201d; matches <br /> any character. </td>
853
854<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name NOT LIKE &#x201c;%Giesen%&#x201d;;</td>
855 </tr>
856 </tbody>
857</table>
858<p>The following table summarizes how the missing value comparison operators work.</p>
859
860<table border="0" class="table table-striped">
861 <thead>
862
863<tr class="a">
864
865<th>Operator </th>
866
867<th>Non-NULL/Non-MISSING value </th>
868
869<th>NULL </th>
870
871<th>MISSING </th>
872 </tr>
873 </thead>
874 <tbody>
875
876<tr class="b">
877
878<td>IS NULL </td>
879
880<td>FALSE </td>
881
882<td>TRUE </td>
883
884<td>MISSING </td>
885 </tr>
886
887<tr class="a">
888
889<td>IS NOT NULL </td>
890
891<td>TRUE </td>
892
893<td>FALSE </td>
894
895<td>MISSING </td>
896 </tr>
897
898<tr class="b">
899
900<td>IS MISSING </td>
901
902<td>FALSE </td>
903
904<td>FALSE </td>
905
906<td>TRUE </td>
907 </tr>
908
909<tr class="a">
910
911<td>IS NOT MISSING </td>
912
913<td>TRUE </td>
914
915<td>TRUE </td>
916
917<td>FALSE </td>
918 </tr>
919
920<tr class="b">
921
922<td>IS UNKNOWN </td>
923
924<td>FALSE </td>
925
926<td>TRUE </td>
927
928<td>TRUE </td>
929 </tr>
930
931<tr class="a">
932
933<td>IS NOT UNKNOWN </td>
934
935<td>TRUE </td>
936
937<td>FALSE </td>
938
939<td>FALSE</td>
940 </tr>
941 </tbody>
942</table></div></div></div>
943<div class="section">
944<h3><a name="Logical_operators" id="Logical_operators">Logical operators</a></h3>
945<p>Logical operators perform logical <tt>NOT</tt>, <tt>AND</tt>, and <tt>OR</tt> operations over Boolean values (<tt>TRUE</tt> and <tt>FALSE</tt>) plus <tt>NULL</tt> and <tt>MISSING</tt>.</p>
946
947<table border="0" class="table table-striped">
948 <thead>
949
950<tr class="a">
951
952<th>Operator </th>
953
954<th>Purpose </th>
955
956<th>Example </th>
957 </tr>
958 </thead>
959 <tbody>
960
961<tr class="b">
962
963<td>NOT </td>
964
965<td>Returns true if the following condition is false, otherwise returns false </td>
966
967<td>SELECT VALUE NOT TRUE; </td>
968 </tr>
969
970<tr class="a">
971
972<td>AND </td>
973
974<td>Returns true if both branches are true, otherwise returns false </td>
975
976<td>SELECT VALUE TRUE AND FALSE; </td>
977 </tr>
978
979<tr class="b">
980
981<td>OR </td>
982
983<td>Returns true if one branch is true, otherwise returns false </td>
984
985<td>SELECT VALUE FALSE OR FALSE; </td>
986 </tr>
987 </tbody>
988</table>
989<p>The following table is the truth table for <tt>AND</tt> and <tt>OR</tt>.</p>
990
991<table border="0" class="table table-striped">
992 <thead>
993
994<tr class="a">
995
996<th>A </th>
997
998<th>B </th>
999
1000<th>A AND B </th>
1001
1002<th>A OR B </th>
1003 </tr>
1004 </thead>
1005 <tbody>
1006
1007<tr class="b">
1008
1009<td>TRUE </td>
1010
1011<td>TRUE </td>
1012
1013<td>TRUE </td>
1014
1015<td>TRUE </td>
1016 </tr>
1017
1018<tr class="a">
1019
1020<td>TRUE </td>
1021
1022<td>FALSE </td>
1023
1024<td>FALSE </td>
1025
1026<td>TRUE </td>
1027 </tr>
1028
1029<tr class="b">
1030
1031<td>TRUE </td>
1032
1033<td>NULL </td>
1034
1035<td>NULL </td>
1036
1037<td>TRUE </td>
1038 </tr>
1039
1040<tr class="a">
1041
1042<td>TRUE </td>
1043
1044<td>MISSING </td>
1045
1046<td>MISSING </td>
1047
1048<td>TRUE </td>
1049 </tr>
1050
1051<tr class="b">
1052
1053<td>FALSE </td>
1054
1055<td>FALSE </td>
1056
1057<td>FALSE </td>
1058
1059<td>FALSE </td>
1060 </tr>
1061
1062<tr class="a">
1063
1064<td>FALSE </td>
1065
1066<td>NULL </td>
1067
1068<td>FALSE </td>
1069
1070<td>NULL </td>
1071 </tr>
1072
1073<tr class="b">
1074
1075<td>FALSE </td>
1076
1077<td>MISSING </td>
1078
1079<td>FALSE </td>
1080
1081<td>MISSING </td>
1082 </tr>
1083
1084<tr class="a">
1085
1086<td>NULL </td>
1087
1088<td>NULL </td>
1089
1090<td>NULL </td>
1091
1092<td>NULL </td>
1093 </tr>
1094
1095<tr class="b">
1096
1097<td>NULL </td>
1098
1099<td>MISSING </td>
1100
1101<td>MISSING </td>
1102
1103<td>NULL </td>
1104 </tr>
1105
1106<tr class="a">
1107
1108<td>MISSING </td>
1109
1110<td>MISSING </td>
1111
1112<td>MISSING </td>
1113
1114<td>MISSING </td>
1115 </tr>
1116 </tbody>
1117</table>
1118<p>The following table demonstrates the results of <tt>NOT</tt> on all possible inputs.</p>
1119
1120<table border="0" class="table table-striped">
1121 <thead>
1122
1123<tr class="a">
1124
1125<th>A </th>
1126
1127<th>NOT A </th>
1128 </tr>
1129 </thead>
1130 <tbody>
1131
1132<tr class="b">
1133
1134<td>TRUE </td>
1135
1136<td>FALSE </td>
1137 </tr>
1138
1139<tr class="a">
1140
1141<td>FALSE </td>
1142
1143<td>TRUE </td>
1144 </tr>
1145
1146<tr class="b">
1147
1148<td>NULL </td>
1149
1150<td>NULL </td>
1151 </tr>
1152
1153<tr class="a">
1154
1155<td>MISSING </td>
1156
1157<td>MISSING </td>
1158 </tr>
1159 </tbody>
1160</table></div></div>
1161<div class="section">
1162<h2><a name="Case_expressions" id="Case_expressions">Case expressions</a></h2>
1163
1164<div class="source">
1165<div class="source">
1166<pre>CaseExpression ::= SimpleCaseExpression | SearchedCaseExpression
1167SimpleCaseExpression ::= &lt;CASE&gt; Expression ( &lt;WHEN&gt; Expression &lt;THEN&gt; Expression )+ ( &lt;ELSE&gt; Expression )? &lt;END&gt;
1168SearchedCaseExpression ::= &lt;CASE&gt; ( &lt;WHEN&gt; Expression &lt;THEN&gt; Expression )+ ( &lt;ELSE&gt; Expression )? &lt;END&gt;
1169</pre></div></div>
1170<p>In a simple <tt>CASE</tt> expression, the query evaluator searches for the first <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pair in which the <tt>WHEN</tt> expression is equal to the expression following <tt>CASE</tt> and returns the expression following <tt>THEN</tt>. If none of the <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pairs meet this condition, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, <tt>NULL</tt> is returned.</p>
1171<p>In a searched CASE expression, the query evaluator searches from left to right until it finds a <tt>WHEN</tt> expression that is evaluated to <tt>TRUE</tt>, and then returns its corresponding <tt>THEN</tt> expression. If no condition is found to be <tt>TRUE</tt>, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, it returns <tt>NULL</tt>.</p>
1172<p>The following example illustrates the form of a case expression.</p>
1173<div class="section">
1174<div class="section">
1175<div class="section">
1176<h5><a name="Example"></a>Example</h5>
1177
1178<div class="source">
1179<div class="source">
1180<pre>CASE (2 &lt; 3) WHEN true THEN &quot;yes&quot; ELSE &quot;no&quot; END
1181</pre></div></div></div></div></div></div>
1182<div class="section">
1183<h2><a name="Quantified_expressions" id="Quantified_expressions">Quantified expressions</a></h2>
1184
1185<div class="source">
1186<div class="source">
1187<pre>QuantifiedExpression ::= ( (&lt;ANY&gt;|&lt;SOME&gt;) | &lt;EVERY&gt; ) Variable &lt;IN&gt; Expression ( &quot;,&quot; Variable &quot;in&quot; Expression )*
1188 &lt;SATISFIES&gt; Expression (&lt;END&gt;)?
1189</pre></div></div>
1190<p>Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.</p>
1191<p>The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the set [1, 2, 3] of integers is less than three. The first example yields <tt>FALSE</tt> and second example yields <tt>TRUE</tt>.</p>
1192<p>It is useful to note that if the set were instead the empty set, the first expression would yield <tt>TRUE</tt> (&#x201c;every&#x201d; value in an empty set satisfies the condition) while the second expression would yield <tt>FALSE</tt> (since there isn&#x2019;t &#x201c;some&#x201d; value, as there are no values in the set, that satisfies the condition).</p>
1193<p>A quantified expression will return a <tt>NULL</tt> (or <tt>MISSING</tt>) if the first expression in it evaluates to <tt>NULL</tt> (or <tt>MISSING</tt>). A type error will be raised if the first expression in a quantified expression does not return a collection.</p>
1194<div class="section">
1195<div class="section">
1196<div class="section">
1197<h5><a name="Examples"></a>Examples</h5>
1198
1199<div class="source">
1200<div class="source">
1201<pre>EVERY x IN [ 1, 2, 3 ] SATISFIES x &lt; 3
1202SOME x IN [ 1, 2, 3 ] SATISFIES x &lt; 3
1203</pre></div></div></div></div></div></div>
1204<div class="section">
1205<h2><a name="Path_expressions" id="Path_expressions">Path expressions</a></h2>
1206
1207<div class="source">
1208<div class="source">
1209<pre>PathExpression ::= PrimaryExpression ( Field | Index )*
1210Field ::= &quot;.&quot; Identifier
1211Index ::= &quot;[&quot; ( Expression | &quot;?&quot; ) &quot;]&quot;
1212</pre></div></div>
1213<p>Components of complex types in the data model are accessed via path expressions. Path access can be applied to the result of a SQL++ expression that yields an instance of a complex type, e.g., a object or array instance. For objects, path access is based on field names. For arrays, path access is based on (zero-based) array-style indexing. SQL++ also supports an &#x201c;I&#x2019;m feeling lucky&#x201d; style index accessor, [?], for selecting an arbitrary element from an array. Attempts to access non-existent fields or out-of-bound array elements produce the special value <tt>MISSING</tt>. Type errors will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.</p>
1214<p>The following examples illustrate field access for a object, index-based element access for an array, and also a composition thereof.</p>
1215<div class="section">
1216<div class="section">
1217<div class="section">
1218<h5><a name="Examples"></a>Examples</h5>
1219
1220<div class="source">
1221<div class="source">
1222<pre>({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array
1223
1224([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[2]
1225
1226({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array[2]
1227</pre></div></div></div></div></div></div>
1228<div class="section">
1229<h2><a name="Primary_Expressions"></a><a name="Primary_expressions" id="Primary_expressions">Primary Expressions</a></h2>
1230
1231<div class="source">
1232<div class="source">
1233<pre>PrimaryExpr ::= Literal
1234 | VariableReference
1235 | ParenthesizedExpression
1236 | FunctionCallExpression
1237 | Constructor
1238</pre></div></div>
1239<p>The most basic building block for any SQL++ expression is PrimaryExpression. This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object, array, or multiset of data model instances).</p>
1240<div class="section">
1241<h3><a name="Literals" id="Literals">Literals</a></h3>
1242
1243<div class="source">
1244<div class="source">
1245<pre>Literal ::= StringLiteral
1246 | IntegerLiteral
1247 | FloatLiteral
1248 | DoubleLiteral
1249 | &lt;NULL&gt;
1250 | &lt;MISSING&gt;
1251 | &lt;TRUE&gt;
1252 | &lt;FALSE&gt;
1253StringLiteral ::= &quot;\&quot;&quot; (
1254 &lt;EscapeQuot&gt;
1255 | &lt;EscapeBslash&gt;
1256 | &lt;EscapeSlash&gt;
1257 | &lt;EscapeBspace&gt;
1258 | &lt;EscapeFormf&gt;
1259 | &lt;EscapeNl&gt;
1260 | &lt;EscapeCr&gt;
1261 | &lt;EscapeTab&gt;
1262 | ~[&quot;\&quot;&quot;,&quot;\\&quot;])*
1263 &quot;\&quot;&quot;
1264 | &quot;\'&quot;(
1265 &lt;EscapeApos&gt;
1266 | &lt;EscapeBslash&gt;
1267 | &lt;EscapeSlash&gt;
1268 | &lt;EscapeBspace&gt;
1269 | &lt;EscapeFormf&gt;
1270 | &lt;EscapeNl&gt;
1271 | &lt;EscapeCr&gt;
1272 | &lt;EscapeTab&gt;
1273 | ~[&quot;\'&quot;,&quot;\\&quot;])*
1274 &quot;\'&quot;
1275&lt;ESCAPE_Apos&gt; ::= &quot;\\\'&quot;
1276&lt;ESCAPE_Quot&gt; ::= &quot;\\\&quot;&quot;
1277&lt;EscapeBslash&gt; ::= &quot;\\\\&quot;
1278&lt;EscapeSlash&gt; ::= &quot;\\/&quot;
1279&lt;EscapeBspace&gt; ::= &quot;\\b&quot;
1280&lt;EscapeFormf&gt; ::= &quot;\\f&quot;
1281&lt;EscapeNl&gt; ::= &quot;\\n&quot;
1282&lt;EscapeCr&gt; ::= &quot;\\r&quot;
1283&lt;EscapeTab&gt; ::= &quot;\\t&quot;
1284
1285IntegerLiteral ::= &lt;DIGITS&gt;
1286&lt;DIGITS&gt; ::= [&quot;0&quot; - &quot;9&quot;]+
1287FloatLiteral ::= &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; )
1288 | &lt;DIGITS&gt; ( &quot;.&quot; &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; ) )?
1289 | &quot;.&quot; &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; )
1290DoubleLiteral ::= &lt;DIGITS&gt; &quot;.&quot; &lt;DIGITS&gt;
1291 | &quot;.&quot; &lt;DIGITS&gt;
1292</pre></div></div>
1293<p>Literals (constants) in SQL++ can be strings, integers, floating point values, double values, boolean constants, or special constant values like <tt>NULL</tt> and <tt>MISSING</tt>. The <tt>NULL</tt> value is like a <tt>NULL</tt> in SQL; it is used to represent an unknown field value. The specialy value <tt>MISSING</tt> is only meaningful in the context of SQL++ field accesses; it occurs when the accessed field simply does not exist at all in a object being accessed.</p>
1294<p>The following are some simple examples of SQL++ literals.</p>
1295<div class="section">
1296<div class="section">
1297<h5><a name="Examples"></a>Examples</h5>
1298
1299<div class="source">
1300<div class="source">
1301<pre>'a string'
1302&quot;test string&quot;
130342
1304</pre></div></div>
1305<p>Different from standard SQL, double quotes play the same role as single quotes and may be used for string literals in SQL++.</p></div></div></div>
1306<div class="section">
1307<h3><a name="Variable_References"></a><a name="Variable_references" id="Variable_references">Variable References</a></h3>
1308
1309<div class="source">
1310<div class="source">
1311<pre>VariableReference ::= &lt;IDENTIFIER&gt;|&lt;DelimitedIdentifier&gt;
1312&lt;IDENTIFIER&gt; ::= &lt;LETTER&gt; (&lt;LETTER&gt; | &lt;DIGIT&gt; | &quot;_&quot; | &quot;$&quot;)*
1313&lt;LETTER&gt; ::= [&quot;A&quot; - &quot;Z&quot;, &quot;a&quot; - &quot;z&quot;]
1314DelimitedIdentifier ::= &quot;`&quot; (&lt;EscapeQuot&gt;
1315 | &lt;EscapeBslash&gt;
1316 | &lt;EscapeSlash&gt;
1317 | &lt;EscapeBspace&gt;
1318 | &lt;EscapeFormf&gt;
1319 | &lt;EscapeNl&gt;
1320 | &lt;EscapeCr&gt;
1321 | &lt;EscapeTab&gt;
1322 | ~[&quot;`&quot;,&quot;\\&quot;])*
1323 &quot;`&quot;
1324</pre></div></div>
1325<p>A variable in SQL++ can be bound to any legal data model value. A variable reference refers to the value to which an in-scope variable is bound. (E.g., a variable binding may originate from one of the <tt>FROM</tt>, <tt>WITH</tt> or <tt>LET</tt> clauses of a <tt>SELECT</tt> statement or from an input parameter in the context of a function body.) Backticks, e.g., `id`, are used for delimited identifiers. Delimiting is needed when a variable&#x2019;s desired name clashes with a SQL++ keyword or includes characters not allowed in regular identifiers.</p>
1326<div class="section">
1327<div class="section">
1328<h5><a name="Examples"></a>Examples</h5>
1329
1330<div class="source">
1331<div class="source">
1332<pre>tweet
1333id
1334`SELECT`
1335`my-function`
1336</pre></div></div></div></div></div>
1337<div class="section">
1338<h3><a name="Parenthesized_expressions" id="Parenthesized_expressions">Parenthesized expressions</a></h3>
1339
1340<div class="source">
1341<div class="source">
1342<pre>ParenthesizedExpression ::= &quot;(&quot; Expression &quot;)&quot; | Subquery
1343</pre></div></div>
1344<p>An expression can be parenthesized to control the precedence order or otherwise clarify a query. In SQL++, for composability, a subquery is also an parenthesized expression.</p>
1345<p>The following expression evaluates to the value 2.</p>
1346<div class="section">
1347<div class="section">
1348<h5><a name="Example"></a>Example</h5>
1349
1350<div class="source">
1351<div class="source">
1352<pre>( 1 + 1 )
1353</pre></div></div></div></div></div>
1354<div class="section">
1355<h3><a name="Function_call_expressions" id="Function_call_expressions">Function call expressions</a></h3>
1356
1357<div class="source">
1358<div class="source">
1359<pre>FunctionCallExpression ::= FunctionName &quot;(&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;)&quot;
1360</pre></div></div>
1361<p>Functions are included in SQL++, like most languages, as a way to package useful functionality or to componentize complicated or reusable SQL++ computations. A function call is a legal SQL++ query expression that represents the value resulting from the evaluation of its body expression with the given parameter bindings; the parameter value bindings can themselves be any SQL++ expressions.</p>
1362<p>The following example is a (built-in) function call expression whose value is 8.</p>
1363<div class="section">
1364<div class="section">
1365<h5><a name="Example"></a>Example</h5>
1366
1367<div class="source">
1368<div class="source">
1369<pre>length('a string')
1370</pre></div></div></div></div></div>
1371<div class="section">
1372<h3><a name="Constructors" id="Constructors">Constructors</a></h3>
1373
1374<div class="source">
1375<div class="source">
1376<pre>Constructor ::= ArrayConstructor | MultisetConstructor | ObjectConstructor
1377ArrayConstructor ::= &quot;[&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;]&quot;
1378MultisetConstructor ::= &quot;{{&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;}}&quot;
1379ObjectConstructor ::= &quot;{&quot; ( FieldBinding ( &quot;,&quot; FieldBinding )* )? &quot;}&quot;
1380FieldBinding ::= Expression &quot;:&quot; Expression
1381</pre></div></div>
1382<p>A major feature of SQL++ is its ability to construct new data model instances. This is accomplished using its constructors for each of the model&#x2019;s complex object structures, namely arrays, multisets, and objects. Arrays are like JSON arrays, while multisets have bag semantics. Objects are built from fields that are field-name/field-value pairs, again like JSON.</p>
1383<p>The following examples illustrate how to construct a new array with 4 items, a new object with 2 fields, and a new multiset with 5 items, respectively. Array elements or multiset elements can be homogeneous (as in the first example), which is the common case, or they may be heterogeneous (as in the third example). The data values and field name values used to construct arrays, multisets, and objects in constructors are all simply SQL++ expressions. Thus, the collection elements, field names, and field values used in constructors can be simple literals or they can come from query variable references or even arbitrarily complex SQL++ expressions (subqueries). Type errors will be raised if the field names in a record must be strings, and duplicate field errors will be raised if they are not distinct.</p>
1384<div class="section">
1385<div class="section">
1386<h5><a name="Examples"></a>Examples</h5>
1387
1388<div class="source">
1389<div class="source">
1390<pre>[ 'a', 'b', 'c', 'c' ]
1391
1392{
1393 'project name': 'Hyracks',
1394 'project members': [ 'vinayakb', 'dtabass', 'chenli', 'tsotras', 'tillw' ]
1395}
1396
1397{{ 42, &quot;forty-two!&quot;, { &quot;rank&quot;: &quot;Captain&quot;, &quot;name&quot;: &quot;America&quot; }, 3.14159, 42 }}
1398</pre></div></div>
1399<!-- ! Licensed to the Apache Software Foundation (ASF) under one
1400 ! or more contributor license agreements. See the NOTICE file
1401 ! distributed with this work for additional information
1402 ! regarding copyright ownership. The ASF licenses this file
1403 ! to you under the Apache License, Version 2.0 (the
1404 ! "License"); you may not use this file except in compliance
1405 ! with the License. You may obtain a copy of the License at
1406 !
1407 ! http://www.apache.org/licenses/LICENSE-2.0
1408 !
1409 ! Unless required by applicable law or agreed to in writing,
1410 ! software distributed under the License is distributed on an
1411 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1412 ! KIND, either express or implied. See the License for the
1413 ! specific language governing permissions and limitations
1414 ! under the License.
1415 ! -->
1416<h1><a name="Queries" id="Queries">3. Queries</a></h1>
1417<p>A SQL++ query can be any legal SQL++ expression or <tt>SELECT</tt> statement. A SQL++ query always ends with a semicolon.</p>
1418
1419<div class="source">
1420<div class="source">
1421<pre>Query ::= (Expression | SelectStatement) &quot;;&quot;
1422</pre></div></div></div></div></div></div>
1423<div class="section">
1424<h2><a name="SELECT_statements" id="SELECT_statements">SELECT statements</a></h2>
1425<p>The following shows the (rich) grammar for the <tt>SELECT</tt> statement in SQL++.</p>
1426
1427<div class="source">
1428<div class="source">
1429<pre>SelectStatement ::= ( WithClause )?
1430 SelectSetOperation (OrderbyClause )? ( LimitClause )?
1431SelectSetOperation ::= SelectBlock (&lt;UNION&gt; &lt;ALL&gt; ( SelectBlock | Subquery ) )*
1432Subquery ::= &quot;(&quot; SelectStatement &quot;)&quot;
1433
1434SelectBlock ::= SelectClause
1435 ( FromClause ( LetClause )?)?
1436 ( WhereClause )?
1437 ( GroupbyClause ( LetClause )? ( HavingClause )? )?
1438 |
1439 FromClause ( LetClause )?
1440 ( WhereClause )?
1441 ( GroupbyClause ( LetClause )? ( HavingClause )? )?
1442 SelectClause
1443
1444SelectClause ::= &lt;SELECT&gt; ( &lt;ALL&gt; | &lt;DISTINCT&gt; )? ( SelectRegular | SelectValue )
1445SelectRegular ::= Projection ( &quot;,&quot; Projection )*
1446SelectValue ::= ( &lt;VALUE&gt; | &lt;ELEMENT&gt; | &lt;RAW&gt; ) Expression
1447Projection ::= ( Expression ( &lt;AS&gt; )? Identifier | &quot;*&quot; )
1448
1449FromClause ::= &lt;FROM&gt; FromTerm ( &quot;,&quot; FromTerm )*
1450FromTerm ::= Expression (( &lt;AS&gt; )? Variable)?
1451 ( ( JoinType )? ( JoinClause | UnnestClause ) )*
1452
1453JoinClause ::= &lt;JOIN&gt; Expression (( &lt;AS&gt; )? Variable)? &lt;ON&gt; Expression
1454UnnestClause ::= ( &lt;UNNEST&gt; | &lt;CORRELATE&gt; | &lt;FLATTEN&gt; ) Expression
1455 ( &lt;AS&gt; )? Variable ( &lt;AT&gt; Variable )?
1456JoinType ::= ( &lt;INNER&gt; | &lt;LEFT&gt; ( &lt;OUTER&gt; )? )
1457
1458WithClause ::= &lt;WITH&gt; WithElement ( &quot;,&quot; WithElement )*
1459LetClause ::= (&lt;LET&gt; | &lt;LETTING&gt;) LetElement ( &quot;,&quot; LetElement )*
1460LetElement ::= Variable &quot;=&quot; Expression
1461WithElement ::= Variable &lt;AS&gt; Expression
1462
1463WhereClause ::= &lt;WHERE&gt; Expression
1464
1465GroupbyClause ::= &lt;GROUP&gt; &lt;BY&gt; ( Expression ( (&lt;AS&gt;)? Variable )? ( &quot;,&quot; Expression ( (&lt;AS&gt;)? Variable )? )*
1466 ( &lt;GROUP&gt; &lt;AS&gt; Variable
1467 (&quot;(&quot; Variable &lt;AS&gt; VariableReference (&quot;,&quot; Variable &lt;AS&gt; VariableReference )* &quot;)&quot;)?
1468 )?
1469HavingClause ::= &lt;HAVING&gt; Expression
1470
1471OrderbyClause ::= &lt;ORDER&gt; &lt;BY&gt; Expression ( &lt;ASC&gt; | &lt;DESC&gt; )? ( &quot;,&quot; Expression ( &lt;ASC&gt; | &lt;DESC&gt; )? )*
1472LimitClause ::= &lt;LIMIT&gt; Expression ( &lt;OFFSET&gt; Expression )?
1473</pre></div></div>
1474<p>In this section, we will make use of two stored collections of objects (datasets), <tt>GleambookUsers</tt> and <tt>GleambookMessages</tt>, in a series of running examples to explain <tt>SELECT</tt> queries. The contents of the example collections are as follows:</p>
1475<p><tt>GleambookUsers</tt> collection (or, dataset):</p>
1476
1477<div class="source">
1478<div class="source">
1479<pre>{&quot;id&quot;:1,&quot;alias&quot;:&quot;Margarita&quot;,&quot;name&quot;:&quot;MargaritaStoddard&quot;,&quot;nickname&quot;:&quot;Mags&quot;,&quot;userSince&quot;:&quot;2012-08-20T10:10:00&quot;,&quot;friendIds&quot;:[2,3,6,10],&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Codetechno&quot;,&quot;start-date&quot;:&quot;2006-08-06&quot;},{&quot;organizationName&quot;:&quot;geomedia&quot;,&quot;start-date&quot;:&quot;2010-06-17&quot;,&quot;end-date&quot;:&quot;2010-01-26&quot;}],&quot;gender&quot;:&quot;F&quot;}
1480{&quot;id&quot;:2,&quot;alias&quot;:&quot;Isbel&quot;,&quot;name&quot;:&quot;IsbelDull&quot;,&quot;nickname&quot;:&quot;Izzy&quot;,&quot;userSince&quot;:&quot;2011-01-22T10:10:00&quot;,&quot;friendIds&quot;:[1,4],&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Hexviafind&quot;,&quot;startDate&quot;:&quot;2010-04-27&quot;}]}
1481{&quot;id&quot;:3,&quot;alias&quot;:&quot;Emory&quot;,&quot;name&quot;:&quot;EmoryUnk&quot;,&quot;userSince&quot;:&quot;2012-07-10T10:10:00&quot;,&quot;friendIds&quot;:[1,5,8,9],&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;geomedia&quot;,&quot;startDate&quot;:&quot;2010-06-17&quot;,&quot;endDate&quot;:&quot;2010-01-26&quot;}]}
1482</pre></div></div>
1483<p><tt>GleambookMessages</tt> collection (or, dataset):</p>
1484
1485<div class="source">
1486<div class="source">
1487<pre>{&quot;messageId&quot;:2,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:4,&quot;senderLocation&quot;:[41.66,80.87],&quot;message&quot;:&quot; dislike iphone its touch-screen is horrible&quot;}
1488{&quot;messageId&quot;:3,&quot;authorId&quot;:2,&quot;inResponseTo&quot;:4,&quot;senderLocation&quot;:[48.09,81.01],&quot;message&quot;:&quot; like samsung the plan is amazing&quot;}
1489{&quot;messageId&quot;:4,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:2,&quot;senderLocation&quot;:[37.73,97.04],&quot;message&quot;:&quot; can't stand at&amp;t the network is horrible:(&quot;}
1490{&quot;messageId&quot;:6,&quot;authorId&quot;:2,&quot;inResponseTo&quot;:1,&quot;senderLocation&quot;:[31.5,75.56],&quot;message&quot;:&quot; like t-mobile its platform is mind-blowing&quot;}
1491{&quot;messageId&quot;:8,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:11,&quot;senderLocation&quot;:[40.33,80.87],&quot;message&quot;:&quot; like verizon the 3G is awesome:)&quot;}
1492{&quot;messageId&quot;:10,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:12,&quot;senderLocation&quot;:[42.5,70.01],&quot;message&quot;:&quot; can't stand motorola the touch-screen is terrible&quot;}
1493{&quot;messageId&quot;:11,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:1,&quot;senderLocation&quot;:[38.97,77.49],&quot;message&quot;:&quot; can't stand at&amp;t its plan is terrible&quot;}
1494</pre></div></div></div>
1495<div class="section">
1496<h2><a name="SELECT_Clause"></a><a name="Select_clauses" id="Select_clauses">SELECT Clause</a></h2>
1497<p>The SQL++ <tt>SELECT</tt> clause always returns a collection value as its result (even if the result is empty or a singleton).</p>
1498<div class="section">
1499<h3><a name="SELECT_VALUE_Clause"></a><a name="Select_element" id="Select_element">SELECT VALUE Clause</a></h3>
1500<p>The <tt>SELECT VALUE</tt> clause in SQL++ returns a collection that contains the results of evaluating the <tt>VALUE</tt> expression, with one evaluation being performed per &#x201c;binding tuple&#x201d; (i.e., per <tt>FROM</tt> clause item) satisfying the statement&#x2019;s selection criteria. For historical reasons SQL++ also allows the keywords <tt>ELEMENT</tt> or <tt>RAW</tt> to be used in place of <tt>VALUE</tt> (not recommended).</p>
1501<p>The following example shows a standard-alone <tt>SELECT VALUE</tt>, which wraps a value into an array.</p>
1502<div class="section">
1503<div class="section">
1504<h5><a name="Example"></a>Example</h5>
1505
1506<div class="source">
1507<div class="source">
1508<pre>SELECT VALUE 1;
1509</pre></div></div>
1510<p>This query return:</p>
1511
1512<div class="source">
1513<div class="source">
1514<pre>[
1515 1
1516]
1517</pre></div></div>
1518<p>The following example shows a query that selects one user from the GleambookUsers collection.</p></div>
1519<div class="section">
1520<h5><a name="Example"></a>Example</h5>
1521
1522<div class="source">
1523<div class="source">
1524<pre>SELECT VALUE user
1525FROM GleambookUsers user
1526WHERE user.id = 1;
1527</pre></div></div>
1528<p>This query returns:</p>
1529
1530<div class="source">
1531<div class="source">
1532<pre>[{
1533 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
1534 &quot;friendIds&quot;: [
1535 2,
1536 3,
1537 6,
1538 10
1539 ],
1540 &quot;gender&quot;: &quot;F&quot;,
1541 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
1542 &quot;nickname&quot;: &quot;Mags&quot;,
1543 &quot;alias&quot;: &quot;Margarita&quot;,
1544 &quot;id&quot;: 1,
1545 &quot;employment&quot;: [
1546 {
1547 &quot;organizationName&quot;: &quot;Codetechno&quot;,
1548 &quot;start-date&quot;: &quot;2006-08-06&quot;
1549 },
1550 {
1551 &quot;end-date&quot;: &quot;2010-01-26&quot;,
1552 &quot;organizationName&quot;: &quot;geomedia&quot;,
1553 &quot;start-date&quot;: &quot;2010-06-17&quot;
1554 }
1555 ]
1556} ]
1557</pre></div></div></div></div></div>
1558<div class="section">
1559<h3><a name="SQL-style_SELECT"></a><a name="SQL_select" id="SQL_select">SQL-style SELECT</a></h3>
1560<p>In SQL++, the traditional SQL-style <tt>SELECT</tt> syntax is also supported. This syntax can also be reformulated in a <tt>SELECT VALUE</tt> based manner in SQL++. (E.g., <tt>SELECT expA AS fldA, expB AS fldB</tt> is syntactic sugar for <tt>SELECT VALUE { 'fldA': expA, 'fldB': expB }</tt>.) Unlike in SQL, the result of an SQL++ query does not preserve the order of expressions in the <tt>SELECT</tt> clause.</p>
1561<div class="section">
1562<div class="section">
1563<h5><a name="Example"></a>Example</h5>
1564
1565<div class="source">
1566<div class="source">
1567<pre>SELECT user.alias user_alias, user.name user_name
1568FROM GleambookUsers user
1569WHERE user.id = 1;
1570</pre></div></div>
1571<p>Returns:</p>
1572
1573<div class="source">
1574<div class="source">
1575<pre>[ {
1576 &quot;user_name&quot;: &quot;MargaritaStoddard&quot;,
1577 &quot;user_alias&quot;: &quot;Margarita&quot;
1578} ]
1579</pre></div></div></div></div></div>
1580<div class="section">
1581<h3><a name="SELECT_"></a><a name="Select_star" id="Select_star">SELECT *</a></h3>
1582<p>In SQL++, <tt>SELECT *</tt> returns a object with a nested field for each input tuple. Each field has as its field name the name of a binding variable generated by either the <tt>FROM</tt> clause or <tt>GROUP BY</tt> clause in the current enclosing <tt>SELECT</tt> statement, and its field value is the value of that binding variable.</p>
1583<div class="section">
1584<div class="section">
1585<h5><a name="Example"></a>Example</h5>
1586
1587<div class="source">
1588<div class="source">
1589<pre>SELECT *
1590FROM GleambookUsers user;
1591</pre></div></div>
1592<p>Since <tt>user</tt> is the only binding variable generated in the <tt>FROM</tt> clause, this query returns:</p>
1593
1594<div class="source">
1595<div class="source">
1596<pre>[ {
1597 &quot;user&quot;: {
1598 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
1599 &quot;friendIds&quot;: [
1600 2,
1601 3,
1602 6,
1603 10
1604 ],
1605 &quot;gender&quot;: &quot;F&quot;,
1606 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
1607 &quot;nickname&quot;: &quot;Mags&quot;,
1608 &quot;alias&quot;: &quot;Margarita&quot;,
1609 &quot;id&quot;: 1,
1610 &quot;employment&quot;: [
1611 {
1612 &quot;organizationName&quot;: &quot;Codetechno&quot;,
1613 &quot;start-date&quot;: &quot;2006-08-06&quot;
1614 },
1615 {
1616 &quot;end-date&quot;: &quot;2010-01-26&quot;,
1617 &quot;organizationName&quot;: &quot;geomedia&quot;,
1618 &quot;start-date&quot;: &quot;2010-06-17&quot;
1619 }
1620 ]
1621 }
1622}, {
1623 &quot;user&quot;: {
1624 &quot;userSince&quot;: &quot;2011-01-22T10:10:00.000Z&quot;,
1625 &quot;friendIds&quot;: [
1626 1,
1627 4
1628 ],
1629 &quot;name&quot;: &quot;IsbelDull&quot;,
1630 &quot;nickname&quot;: &quot;Izzy&quot;,
1631 &quot;alias&quot;: &quot;Isbel&quot;,
1632 &quot;id&quot;: 2,
1633 &quot;employment&quot;: [
1634 {
1635 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1636 &quot;startDate&quot;: &quot;2010-04-27&quot;
1637 }
1638 ]
1639 }
1640}, {
1641 &quot;user&quot;: {
1642 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
1643 &quot;friendIds&quot;: [
1644 1,
1645 5,
1646 8,
1647 9
1648 ],
1649 &quot;name&quot;: &quot;EmoryUnk&quot;,
1650 &quot;alias&quot;: &quot;Emory&quot;,
1651 &quot;id&quot;: 3,
1652 &quot;employment&quot;: [
1653 {
1654 &quot;organizationName&quot;: &quot;geomedia&quot;,
1655 &quot;endDate&quot;: &quot;2010-01-26&quot;,
1656 &quot;startDate&quot;: &quot;2010-06-17&quot;
1657 }
1658 ]
1659 }
1660} ]
1661</pre></div></div></div>
1662<div class="section">
1663<h5><a name="Example"></a>Example</h5>
1664
1665<div class="source">
1666<div class="source">
1667<pre>SELECT *
1668FROM GleambookUsers u, GleambookMessages m
1669WHERE m.authorId = u.id and u.id = 2;
1670</pre></div></div>
1671<p>This query does an inner join that we will discuss in <a href="#Multiple_from_terms">multiple from terms</a>. Since both <tt>u</tt> and <tt>m</tt> are binding variable generated in the <tt>FROM</tt> clause, this query returns:</p>
1672
1673<div class="source">
1674<div class="source">
1675<pre>[ {
1676 &quot;u&quot;: {
1677 &quot;userSince&quot;: &quot;2011-01-22T10:10:00&quot;,
1678 &quot;friendIds&quot;: [
1679 1,
1680 4
1681 ],
1682 &quot;name&quot;: &quot;IsbelDull&quot;,
1683 &quot;nickname&quot;: &quot;Izzy&quot;,
1684 &quot;alias&quot;: &quot;Isbel&quot;,
1685 &quot;id&quot;: 2,
1686 &quot;employment&quot;: [
1687 {
1688 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1689 &quot;startDate&quot;: &quot;2010-04-27&quot;
1690 }
1691 ]
1692 },
1693 &quot;m&quot;: {
1694 &quot;senderLocation&quot;: [
1695 31.5,
1696 75.56
1697 ],
1698 &quot;inResponseTo&quot;: 1,
1699 &quot;messageId&quot;: 6,
1700 &quot;authorId&quot;: 2,
1701 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
1702 }
1703}, {
1704 &quot;u&quot;: {
1705 &quot;userSince&quot;: &quot;2011-01-22T10:10:00&quot;,
1706 &quot;friendIds&quot;: [
1707 1,
1708 4
1709 ],
1710 &quot;name&quot;: &quot;IsbelDull&quot;,
1711 &quot;nickname&quot;: &quot;Izzy&quot;,
1712 &quot;alias&quot;: &quot;Isbel&quot;,
1713 &quot;id&quot;: 2,
1714 &quot;employment&quot;: [
1715 {
1716 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1717 &quot;startDate&quot;: &quot;2010-04-27&quot;
1718 }
1719 ]
1720 },
1721 &quot;m&quot;: {
1722 &quot;senderLocation&quot;: [
1723 48.09,
1724 81.01
1725 ],
1726 &quot;inResponseTo&quot;: 4,
1727 &quot;messageId&quot;: 3,
1728 &quot;authorId&quot;: 2,
1729 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
1730 }
1731} ]
1732</pre></div></div></div></div></div>
1733<div class="section">
1734<h3><a name="SELECT_DISTINCT"></a><a name="Select_distinct" id="Select_distinct">SELECT DISTINCT</a></h3>
1735<p>SQL++&#x2019;s <tt>DISTINCT</tt> keyword is used to eliminate duplicate items in results. The following example shows how it works.</p>
1736<div class="section">
1737<div class="section">
1738<h5><a name="Example"></a>Example</h5>
1739
1740<div class="source">
1741<div class="source">
1742<pre>SELECT DISTINCT * FROM [1, 2, 2, 3] AS foo;
1743</pre></div></div>
1744<p>This query returns:</p>
1745
1746<div class="source">
1747<div class="source">
1748<pre>[ {
1749 &quot;foo&quot;: 1
1750}, {
1751 &quot;foo&quot;: 2
1752}, {
1753 &quot;foo&quot;: 3
1754} ]
1755</pre></div></div></div>
1756<div class="section">
1757<h5><a name="Example"></a>Example</h5>
1758
1759<div class="source">
1760<div class="source">
1761<pre>SELECT DISTINCT VALUE foo FROM [1, 2, 2, 3] AS foo;
1762</pre></div></div>
1763<p>This version of the query returns:</p>
1764
1765<div class="source">
1766<div class="source">
1767<pre>[ 1
1768, 2
1769, 3
1770 ]
1771</pre></div></div></div></div></div>
1772<div class="section">
1773<h3><a name="Unnamed_projections" id="Unnamed_projections">Unnamed projections</a></h3>
1774<p>Similar to standard SQL, SQL++ supports unnamed projections (a.k.a, unnamed <tt>SELECT</tt> clause items), for which names are generated. Name generation has three cases:</p>
1775
1776<ul>
1777
1778<li>If a projection expression is a variable reference expression, its generated name is the name of the variable.</li>
1779
1780<li>If a projection expression is a field access expression, its generated name is the last identifier in the expression.</li>
1781
1782<li>For all other cases, the query processor will generate a unique name.</li>
1783</ul>
1784<div class="section">
1785<div class="section">
1786<h5><a name="Example"></a>Example</h5>
1787
1788<div class="source">
1789<div class="source">
1790<pre>SELECT substr(user.name, 10), user.alias
1791FROM GleambookUsers user
1792WHERE user.id = 1;
1793</pre></div></div>
1794<p>This query outputs:</p>
1795
1796<div class="source">
1797<div class="source">
1798<pre>[ {
1799 &quot;alias&quot;: &quot;Margarita&quot;,
1800 &quot;$1&quot;: &quot;Stoddard&quot;
1801} ]
1802</pre></div></div>
1803<p>In the result, <tt>$1</tt> is the generated name for <tt>substr(user.name, 1)</tt>, while <tt>alias</tt> is the generated name for <tt>user.alias</tt>.</p></div></div></div>
1804<div class="section">
1805<h3><a name="Abbreviated_Field_Access_Expressions"></a><a name="Abbreviatory_field_access_expressions" id="Abbreviatory_field_access_expressions">Abbreviated Field Access Expressions</a></h3>
1806<p>As in standard SQL, SQL++ field access expressions can be abbreviated (not recommended) when there is no ambiguity. In the next example, the variable <tt>user</tt> is the only possible variable reference for fields <tt>id</tt>, <tt>name</tt> and <tt>alias</tt> and thus could be omitted in the query.</p>
1807<div class="section">
1808<div class="section">
1809<h5><a name="Example"></a>Example</h5>
1810
1811<div class="source">
1812<div class="source">
1813<pre>SELECT substr(name, 10) AS lname, alias
1814FROM GleambookUsers user
1815WHERE id = 1;
1816</pre></div></div>
1817<p>Outputs:</p>
1818
1819<div class="source">
1820<div class="source">
1821<pre>[ {
1822 &quot;lname&quot;: &quot;Stoddard&quot;,
1823 &quot;alias&quot;: &quot;Margarita&quot;
1824} ]
1825</pre></div></div></div></div></div></div>
1826<div class="section">
1827<h2><a name="UNNEST_Clause"></a><a name="Unnest_clauses" id="Unnest_clauses">UNNEST Clause</a></h2>
1828<p>For each of its input tuples, the <tt>UNNEST</tt> clause flattens a collection-valued expression into individual items, producing multiple tuples, each of which is one of the expression&#x2019;s original input tuples augmented with a flattened item from its collection.</p>
1829<div class="section">
1830<h3><a name="Inner_UNNEST"></a><a name="Inner_unnests" id="Inner_unnests">Inner UNNEST</a></h3>
1831<p>The following example is a query that retrieves the names of the organizations that a selected user has worked for. It uses the <tt>UNNEST</tt> clause to unnest the nested collection <tt>employment</tt> in the user&#x2019;s object.</p>
1832<div class="section">
1833<div class="section">
1834<h5><a name="Example"></a>Example</h5>
1835
1836<div class="source">
1837<div class="source">
1838<pre>SELECT u.id AS userId, e.organizationName AS orgName
1839FROM GleambookUsers u
1840UNNEST u.employment e
1841WHERE u.id = 1;
1842</pre></div></div>
1843<p>This query returns:</p>
1844
1845<div class="source">
1846<div class="source">
1847<pre>[ {
1848 &quot;orgName&quot;: &quot;Codetechno&quot;,
1849 &quot;userId&quot;: 1
1850}, {
1851 &quot;orgName&quot;: &quot;geomedia&quot;,
1852 &quot;userId&quot;: 1
1853} ]
1854</pre></div></div>
1855<p>Note that <tt>UNNEST</tt> has SQL&#x2019;s inner join semantics &#x2014; that is, if a user has no employment history, no tuple corresponding to that user will be emitted in the result.</p></div></div></div>
1856<div class="section">
1857<h3><a name="Left_outer_UNNEST"></a><a name="Left_outer_unnests" id="Left_outer_unnests">Left outer UNNEST</a></h3>
1858<p>As an alternative, the <tt>LEFT OUTER UNNEST</tt> clause offers SQL&#x2019;s left outer join semantics. For example, no collection-valued field named <tt>hobbies</tt> exists in the object for the user whose id is 1, but the following query&#x2019;s result still includes user 1.</p>
1859<div class="section">
1860<div class="section">
1861<h5><a name="Example"></a>Example</h5>
1862
1863<div class="source">
1864<div class="source">
1865<pre>SELECT u.id AS userId, h.hobbyName AS hobby
1866FROM GleambookUsers u
1867LEFT OUTER UNNEST u.hobbies h
1868WHERE u.id = 1;
1869</pre></div></div>
1870<p>Returns:</p>
1871
1872<div class="source">
1873<div class="source">
1874<pre>[ {
1875 &quot;userId&quot;: 1
1876} ]
1877</pre></div></div>
1878<p>Note that if <tt>u.hobbies</tt> is an empty collection or leads to a <tt>MISSING</tt> (as above) or <tt>NULL</tt> value for a given input tuple, there is no corresponding binding value for variable <tt>h</tt> for an input tuple. A <tt>MISSING</tt> value will be generated for <tt>h</tt> so that the input tuple can still be propagated.</p></div></div></div>
1879<div class="section">
1880<h3><a name="Expressing_joins_using_UNNEST"></a><a name="Expressing_joins_using_unnests" id="Expressing_joins_using_unnests">Expressing joins using UNNEST</a></h3>
1881<p>The SQL++ <tt>UNNEST</tt> clause is similar to SQL&#x2019;s <tt>JOIN</tt> clause except that it allows its right argument to be correlated to its left argument, as in the examples above &#x2014; i.e., think &#x201c;correlated cross-product&#x201d;. The next example shows this via a query that joins two data sets, GleambookUsers and GleambookMessages, returning user/message pairs. The results contain one object per pair, with result objects containing the user&#x2019;s name and an entire message. The query can be thought of as saying &#x201c;for each Gleambook user, unnest the <tt>GleambookMessages</tt> collection and filter the output with the condition <tt>message.authorId = user.id</tt>&#x201d;.</p>
1882<div class="section">
1883<div class="section">
1884<h5><a name="Example"></a>Example</h5>
1885
1886<div class="source">
1887<div class="source">
1888<pre>SELECT u.name AS uname, m.message AS message
1889FROM GleambookUsers u
1890UNNEST GleambookMessages m
1891WHERE m.authorId = u.id;
1892</pre></div></div>
1893<p>This returns:</p>
1894
1895<div class="source">
1896<div class="source">
1897<pre>[ {
1898 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
1899 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
1900}, {
1901 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
1902 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
1903}, {
1904 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
1905 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
1906}, {
1907 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
1908 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
1909}, {
1910 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
1911 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
1912}, {
1913 &quot;uname&quot;: &quot;IsbelDull&quot;,
1914 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
1915}, {
1916 &quot;uname&quot;: &quot;IsbelDull&quot;,
1917 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
1918} ]
1919</pre></div></div>
1920<p>Similarly, the above query can also be expressed as the <tt>UNNEST</tt>ing of a correlated SQL++ subquery:</p></div>
1921<div class="section">
1922<h5><a name="Example"></a>Example</h5>
1923
1924<div class="source">
1925<div class="source">
1926<pre>SELECT u.name AS uname, m.message AS message
1927FROM GleambookUsers u
1928UNNEST (
1929 SELECT VALUE msg
1930 FROM GleambookMessages msg
1931 WHERE msg.authorId = u.id
1932) AS m;
1933</pre></div></div></div></div></div></div>
1934<div class="section">
1935<h2><a name="FROM_clauses"></a><a name="From_clauses" id="From_clauses">FROM clauses</a></h2>
1936<p>A <tt>FROM</tt> clause is used for enumerating (i.e., conceptually iterating over) the contents of collections, as in SQL.</p>
1937<div class="section">
1938<h3><a name="Binding_expressions" id="Binding_expressions">Binding expressions</a></h3>
1939<p>In SQL++, in addition to stored collections, a <tt>FROM</tt> clause can iterate over any intermediate collection returned by a valid SQL++ expression. In the tuple stream generated by a <tt>FROM</tt> clause, the ordering of the input tuples are not guaranteed to be preserved.</p>
1940<div class="section">
1941<div class="section">
1942<h5><a name="Example"></a>Example</h5>
1943
1944<div class="source">
1945<div class="source">
1946<pre>SELECT VALUE foo
1947FROM [1, 2, 2, 3] AS foo
1948WHERE foo &gt; 2;
1949</pre></div></div>
1950<p>Returns:</p>
1951
1952<div class="source">
1953<div class="source">
1954<pre>[
1955 3
1956]
1957</pre></div></div></div></div></div>
1958<div class="section">
1959<h3><a name="Multiple_FROM_terms"></a><a name="Multiple_from_terms" id="Multiple_from_terms">Multiple FROM terms</a></h3>
1960<p>SQL++ permits correlations among <tt>FROM</tt> terms. Specifically, a <tt>FROM</tt> binding expression can refer to variables defined to its left in the given <tt>FROM</tt> clause. Thus, the first unnesting example above could also be expressed as follows:</p>
1961<div class="section">
1962<div class="section">
1963<h5><a name="Example"></a>Example</h5>
1964
1965<div class="source">
1966<div class="source">
1967<pre>SELECT u.id AS userId, e.organizationName AS orgName
1968FROM GleambookUsers u, u.employment e
1969WHERE u.id = 1;
1970</pre></div></div></div></div></div>
1971<div class="section">
1972<h3><a name="Expressing_joins_using_FROM_terms"></a><a name="Expressing_joins_using_from_terms" id="Expressing_joins_using_from_terms">Expressing joins using FROM terms</a></h3>
1973<p>Similarly, the join intentions of the other <tt>UNNEST</tt>-based join examples above could be expressed as:</p>
1974<div class="section">
1975<div class="section">
1976<h5><a name="Example"></a>Example</h5>
1977
1978<div class="source">
1979<div class="source">
1980<pre>SELECT u.name AS uname, m.message AS message
1981FROM GleambookUsers u, GleambookMessages m
1982WHERE m.authorId = u.id;
1983</pre></div></div></div>
1984<div class="section">
1985<h5><a name="Example"></a>Example</h5>
1986
1987<div class="source">
1988<div class="source">
1989<pre>SELECT u.name AS uname, m.message AS message
1990FROM GleambookUsers u,
1991 (
1992 SELECT VALUE msg
1993 FROM GleambookMessages msg
1994 WHERE msg.authorId = u.id
1995 ) AS m;
1996</pre></div></div>
1997<p>Note that the first alternative is one of the SQL-92 approaches to expressing a join.</p></div></div></div>
1998<div class="section">
1999<h3><a name="Implicit_binding_variables" id="Implicit_binding_variables">Implicit binding variables</a></h3>
2000<p>Similar to standard SQL, SQL++ supports implicit <tt>FROM</tt> binding variables (i.e., aliases), for which a binding variable is generated. SQL++ variable generation falls into three cases:</p>
2001
2002<ul>
2003
2004<li>If the binding expression is a variable reference expression, the generated variable&#x2019;s name will be the name of the referenced variable itself.</li>
2005
2006<li>If the binding expression is a field access expression (or a fully qualified name for a dataset), the generated variable&#x2019;s name will be the last identifier (or the dataset name) in the expression.</li>
2007
2008<li>For all other cases, a compilation error will be raised.</li>
2009</ul>
2010<p>The next two examples show queries that do not provide binding variables in their <tt>FROM</tt> clauses.</p>
2011<div class="section">
2012<div class="section">
2013<h5><a name="Example"></a>Example</h5>
2014
2015<div class="source">
2016<div class="source">
2017<pre>SELECT GleambookUsers.name, GleambookMessages.message
2018FROM GleambookUsers, GleambookMessages
2019WHERE GleambookMessages.authorId = GleambookUsers.id;
2020</pre></div></div>
2021<p>Returns:</p>
2022
2023<div class="source">
2024<div class="source">
2025<pre>[ {
2026 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2027 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2028}, {
2029 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2030 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
2031}, {
2032 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2033 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
2034}, {
2035 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2036 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
2037}, {
2038 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2039 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
2040}, {
2041 &quot;name&quot;: &quot;IsbelDull&quot;,
2042 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2043}, {
2044 &quot;name&quot;: &quot;IsbelDull&quot;,
2045 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2046} ]
2047</pre></div></div></div>
2048<div class="section">
2049<h5><a name="Example"></a>Example</h5>
2050
2051<div class="source">
2052<div class="source">
2053<pre>SELECT GleambookUsers.name, GleambookMessages.message
2054FROM GleambookUsers,
2055 (
2056 SELECT VALUE GleambookMessages
2057 FROM GleambookMessages
2058 WHERE GleambookMessages.authorId = GleambookUsers.id
2059 );
2060</pre></div></div>
2061<p>Returns:</p>
2062
2063<div class="source">
2064<div class="source">
2065<pre>Error: &quot;Syntax error: Need an alias for the enclosed expression:\n(select element GleambookMessages\n from GleambookMessages as GleambookMessages\n where (GleambookMessages.authorId = GleambookUsers.id)\n )&quot;,
2066 &quot;query_from_user&quot;: &quot;use TinySocial;\n\nSELECT GleambookUsers.name, GleambookMessages.message\n FROM GleambookUsers,\n (\n SELECT VALUE GleambookMessages\n FROM GleambookMessages\n WHERE GleambookMessages.authorId = GleambookUsers.id\n );&quot;
2067</pre></div></div></div></div></div></div>
2068<div class="section">
2069<h2><a name="JOIN_clauses"></a><a name="Join_clauses" id="Join_clauses">JOIN clauses</a></h2>
2070<p>The join clause in SQL++ supports both inner joins and left outer joins from standard SQL.</p>
2071<div class="section">
2072<h3><a name="Inner_joins" id="Inner_joins">Inner joins</a></h3>
2073<p>Using a <tt>JOIN</tt> clause, the inner join intent from the preceeding examples can also be expressed as follows:</p>
2074<div class="section">
2075<div class="section">
2076<h5><a name="Example"></a>Example</h5>
2077
2078<div class="source">
2079<div class="source">
2080<pre>SELECT u.name AS uname, m.message AS message
2081FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
2082</pre></div></div></div></div></div>
2083<div class="section">
2084<h3><a name="Left_outer_joins" id="Left_outer_joins">Left outer joins</a></h3>
2085<p>SQL++ supports SQL&#x2019;s notion of left outer join. The following query is an example:</p>
2086
2087<div class="source">
2088<div class="source">
2089<pre>SELECT u.name AS uname, m.message AS message
2090FROM GleambookUsers u LEFT OUTER JOIN GleambookMessages m ON m.authorId = u.id;
2091</pre></div></div>
2092<p>Returns:</p>
2093
2094<div class="source">
2095<div class="source">
2096<pre>[ {
2097 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2098 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2099}, {
2100 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2101 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
2102}, {
2103 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2104 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
2105}, {
2106 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2107 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
2108}, {
2109 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2110 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
2111}, {
2112 &quot;uname&quot;: &quot;IsbelDull&quot;,
2113 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2114}, {
2115 &quot;uname&quot;: &quot;IsbelDull&quot;,
2116 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2117}, {
2118 &quot;uname&quot;: &quot;EmoryUnk&quot;
2119} ]
2120</pre></div></div>
2121<p>For non-matching left-side tuples, SQL++ produces <tt>MISSING</tt> values for the right-side binding variables; that is why the last object in the above result doesn&#x2019;t have a <tt>message</tt> field. Note that this is slightly different from standard SQL, which instead would fill in <tt>NULL</tt> values for the right-side fields. The reason for this difference is that, for non-matches in its join results, SQL++ views fields from the right-side as being &#x201c;not there&#x201d; (a.k.a. <tt>MISSING</tt>) instead of as being &#x201c;there but unknown&#x201d; (i.e., <tt>NULL</tt>).</p>
2122<p>The left-outer join query can also be expressed using <tt>LEFT OUTER UNNEST</tt>:</p>
2123
2124<div class="source">
2125<div class="source">
2126<pre>SELECT u.name AS uname, m.message AS message
2127FROM GleambookUsers u
2128LEFT OUTER UNNEST (
2129 SELECT VALUE message
2130 FROM GleambookMessages message
2131 WHERE message.authorId = u.id
2132 ) m;
2133</pre></div></div>
2134<p>In general, in SQL++, SQL-style join queries can also be expressed by <tt>UNNEST</tt> clauses and left outer join queries can be expressed by <tt>LEFT OUTER UNNESTs</tt>.</p></div></div>
2135<div class="section">
2136<h2><a name="GROUP_BY_clauses"></a><a name="Group_By_clauses" id="Group_By_clauses">GROUP BY clauses</a></h2>
2137<p>The SQL++ <tt>GROUP BY</tt> clause generalizes standard SQL&#x2019;s grouping and aggregation semantics, but it also retains backward compatibility with the standard (relational) SQL <tt>GROUP BY</tt> and aggregation features.</p>
2138<div class="section">
2139<h3><a name="Group_variables" id="Group_variables">Group variables</a></h3>
2140<p>In a <tt>GROUP BY</tt> clause, in addition to the binding variable(s) defined for the grouping key(s), SQL++ allows a user to define a <i>group variable</i> by using the clause&#x2019;s <tt>GROUP AS</tt> extension to denote the resulting group. After grouping, then, the query&#x2019;s in-scope variables include the grouping key&#x2019;s binding variables as well as this group variable which will be bound to one collection value for each group. This per-group collection (i.e., multiset) value will be a set of nested objects in which each field of the object is the result of a renamed variable defined in parentheses following the group variable&#x2019;s name. The <tt>GROUP AS</tt> syntax is as follows:</p>
2141
2142<div class="source">
2143<div class="source">
2144<pre>&lt;GROUP&gt; &lt;AS&gt; Variable (&quot;(&quot; Variable &lt;AS&gt; VariableReference (&quot;,&quot; Variable &lt;AS&gt; VariableReference )* &quot;)&quot;)?
2145</pre></div></div>
2146<div class="section">
2147<div class="section">
2148<h5><a name="Example"></a>Example</h5>
2149
2150<div class="source">
2151<div class="source">
2152<pre>SELECT *
2153FROM GleambookMessages message
2154GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
2155</pre></div></div>
2156<p>This first example query returns:</p>
2157
2158<div class="source">
2159<div class="source">
2160<pre>[ {
2161 &quot;msgs&quot;: [
2162 {
2163 &quot;msg&quot;: {
2164 &quot;senderLocation&quot;: [
2165 38.97,
2166 77.49
2167 ],
2168 &quot;inResponseTo&quot;: 1,
2169 &quot;messageId&quot;: 11,
2170 &quot;authorId&quot;: 1,
2171 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
2172 }
2173 },
2174 {
2175 &quot;msg&quot;: {
2176 &quot;senderLocation&quot;: [
2177 41.66,
2178 80.87
2179 ],
2180 &quot;inResponseTo&quot;: 4,
2181 &quot;messageId&quot;: 2,
2182 &quot;authorId&quot;: 1,
2183 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
2184 }
2185 },
2186 {
2187 &quot;msg&quot;: {
2188 &quot;senderLocation&quot;: [
2189 37.73,
2190 97.04
2191 ],
2192 &quot;inResponseTo&quot;: 2,
2193 &quot;messageId&quot;: 4,
2194 &quot;authorId&quot;: 1,
2195 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
2196 }
2197 },
2198 {
2199 &quot;msg&quot;: {
2200 &quot;senderLocation&quot;: [
2201 40.33,
2202 80.87
2203 ],
2204 &quot;inResponseTo&quot;: 11,
2205 &quot;messageId&quot;: 8,
2206 &quot;authorId&quot;: 1,
2207 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2208 }
2209 },
2210 {
2211 &quot;msg&quot;: {
2212 &quot;senderLocation&quot;: [
2213 42.5,
2214 70.01
2215 ],
2216 &quot;inResponseTo&quot;: 12,
2217 &quot;messageId&quot;: 10,
2218 &quot;authorId&quot;: 1,
2219 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
2220 }
2221 }
2222 ],
2223 &quot;uid&quot;: 1
2224}, {
2225 &quot;msgs&quot;: [
2226 {
2227 &quot;msg&quot;: {
2228 &quot;senderLocation&quot;: [
2229 31.5,
2230 75.56
2231 ],
2232 &quot;inResponseTo&quot;: 1,
2233 &quot;messageId&quot;: 6,
2234 &quot;authorId&quot;: 2,
2235 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2236 }
2237 },
2238 {
2239 &quot;msg&quot;: {
2240 &quot;senderLocation&quot;: [
2241 48.09,
2242 81.01
2243 ],
2244 &quot;inResponseTo&quot;: 4,
2245 &quot;messageId&quot;: 3,
2246 &quot;authorId&quot;: 2,
2247 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2248 }
2249 }
2250 ],
2251 &quot;uid&quot;: 2
2252} ]
2253</pre></div></div>
2254<p>As we can see from the above query result, each group in the example query&#x2019;s output has an associated group variable value called <tt>msgs</tt> that appears in the <tt>SELECT *</tt>&#x2019;s result. This variable contains a collection of objects associated with the group; each of the group&#x2019;s <tt>message</tt> values appears in the <tt>msg</tt> field of the objects in the <tt>msgs</tt> collection.</p>
2255<p>The group variable in SQL++ makes more complex, composable, nested subqueries over a group possible, which is important given the more complex data model of SQL++ (relative to SQL). As a simple example of this, as we really just want the messages associated with each user, we might wish to avoid the &#x201c;extra wrapping&#x201d; of each message as the <tt>msg</tt> field of a object. (That wrapping is useful in more complex cases, but is essentially just in the way here.) We can use a subquery in the <tt>SELECT</tt> clase to tunnel through the extra nesting and produce the desired result.</p></div>
2256<div class="section">
2257<h5><a name="Example"></a>Example</h5>
2258
2259<div class="source">
2260<div class="source">
2261<pre>SELECT uid, (SELECT VALUE g.msg FROM g) AS msgs
2262FROM GleambookMessages gbm
2263GROUP BY gbm.authorId AS uid
2264GROUP AS g(gbm as msg);
2265</pre></div></div>
2266<p>This variant of the example query returns:</p>
2267
2268<div class="source">
2269<div class="source">
2270<pre> [ {
2271 &quot;msgs&quot;: [
2272 {
2273 &quot;senderLocation&quot;: [
2274 38.97,
2275 77.49
2276 ],
2277 &quot;inResponseTo&quot;: 1,
2278 &quot;messageId&quot;: 11,
2279 &quot;authorId&quot;: 1,
2280 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
2281 },
2282 {
2283 &quot;senderLocation&quot;: [
2284 41.66,
2285 80.87
2286 ],
2287 &quot;inResponseTo&quot;: 4,
2288 &quot;messageId&quot;: 2,
2289 &quot;authorId&quot;: 1,
2290 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
2291 },
2292 {
2293 &quot;senderLocation&quot;: [
2294 37.73,
2295 97.04
2296 ],
2297 &quot;inResponseTo&quot;: 2,
2298 &quot;messageId&quot;: 4,
2299 &quot;authorId&quot;: 1,
2300 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
2301 },
2302 {
2303 &quot;senderLocation&quot;: [
2304 40.33,
2305 80.87
2306 ],
2307 &quot;inResponseTo&quot;: 11,
2308 &quot;messageId&quot;: 8,
2309 &quot;authorId&quot;: 1,
2310 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2311 },
2312 {
2313 &quot;senderLocation&quot;: [
2314 42.5,
2315 70.01
2316 ],
2317 &quot;inResponseTo&quot;: 12,
2318 &quot;messageId&quot;: 10,
2319 &quot;authorId&quot;: 1,
2320 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
2321 }
2322 ],
2323 &quot;uid&quot;: 1
2324 }, {
2325 &quot;msgs&quot;: [
2326 {
2327 &quot;senderLocation&quot;: [
2328 31.5,
2329 75.56
2330 ],
2331 &quot;inResponseTo&quot;: 1,
2332 &quot;messageId&quot;: 6,
2333 &quot;authorId&quot;: 2,
2334 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2335 },
2336 {
2337 &quot;senderLocation&quot;: [
2338 48.09,
2339 81.01
2340 ],
2341 &quot;inResponseTo&quot;: 4,
2342 &quot;messageId&quot;: 3,
2343 &quot;authorId&quot;: 2,
2344 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2345 }
2346 ],
2347 &quot;uid&quot;: 2
2348 } ]
2349</pre></div></div>
2350<p>Because this is a fairly common case, a third variant with output identical to the second variant is also possible:</p></div>
2351<div class="section">
2352<h5><a name="Example"></a>Example</h5>
2353
2354<div class="source">
2355<div class="source">
2356<pre>SELECT uid, msg AS msgs
2357FROM GleambookMessages gbm
2358GROUP BY gbm.authorId AS uid
2359GROUP AS g(gbm as msg);
2360</pre></div></div>
2361<p>This variant of the query exploits a bit of SQL-style &#x201c;syntactic sugar&#x201d; that SQL++ offers to shorten some user queries. In particular, in the <tt>SELECT</tt> list, the reference to the <tt>GROUP</tt> variable field <tt>msg</tt> &#x2013; because it references a field of the group variable &#x2013; is allowed but is &#x201c;pluralized&#x201d;. As a result, the <tt>msg</tt> reference in the <tt>SELECT</tt> list is implicitly rewritten into the second variant&#x2019;s <tt>SELECT VALUE</tt> subquery.</p>
2362<p>The next example shows a more interesting case involving the use of a subquery in the <tt>SELECT</tt> list. Here the subquery further processes the groups.</p></div>
2363<div class="section">
2364<h5><a name="Example"></a>Example</h5>
2365
2366<div class="source">
2367<div class="source">
2368<pre>SELECT uid,
2369 (SELECT VALUE g.msg
2370 FROM g
2371 WHERE g.msg.message LIKE '% like%'
2372 ORDER BY g.msg.messageId
2373 LIMIT 2) AS msgs
2374FROM GleambookMessages gbm
2375GROUP BY gbm.authorId AS uid
2376GROUP AS g(gbm as msg);
2377</pre></div></div>
2378<p>This example query returns:</p>
2379
2380<div class="source">
2381<div class="source">
2382<pre>[ {
2383 &quot;msgs&quot;: [
2384 {
2385 &quot;senderLocation&quot;: [
2386 40.33,
2387 80.87
2388 ],
2389 &quot;inResponseTo&quot;: 11,
2390 &quot;messageId&quot;: 8,
2391 &quot;authorId&quot;: 1,
2392 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2393 }
2394 ],
2395 &quot;uid&quot;: 1
2396}, {
2397 &quot;msgs&quot;: [
2398 {
2399 &quot;senderLocation&quot;: [
2400 48.09,
2401 81.01
2402 ],
2403 &quot;inResponseTo&quot;: 4,
2404 &quot;messageId&quot;: 3,
2405 &quot;authorId&quot;: 2,
2406 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2407 },
2408 {
2409 &quot;senderLocation&quot;: [
2410 31.5,
2411 75.56
2412 ],
2413 &quot;inResponseTo&quot;: 1,
2414 &quot;messageId&quot;: 6,
2415 &quot;authorId&quot;: 2,
2416 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2417 }
2418 ],
2419 &quot;uid&quot;: 2
2420} ]
2421</pre></div></div></div></div></div>
2422<div class="section">
2423<h3><a name="Implicit_grouping_key_variables"></a><a name="Implicit_group_key_variables" id="Implicit_group_key_variables">Implicit grouping key variables</a></h3>
2424<p>In the SQL++ syntax, providing named binding variables for <tt>GROUP BY</tt> key expressions is optional. If a grouping key is missing a user-provided binding variable, the underlying compiler will generate one. Automatic grouping key variable naming falls into three cases in SQL++, much like the treatment of unnamed projections:</p>
2425
2426<ul>
2427
2428<li>If the grouping key expression is a variable reference expression, the generated variable gets the same name as the referred variable;</li>
2429
2430<li>If the grouping key expression is a field access expression, the generated variable gets the same name as the last identifier in the expression;</li>
2431
2432<li>For all other cases, the compiler generates a unique variable (but the user query is unable to refer to this generated variable).</li>
2433</ul>
2434<p>The next example illustrates a query that doesn&#x2019;t provide binding variables for its grouping key expressions.</p>
2435<div class="section">
2436<div class="section">
2437<h5><a name="Example"></a>Example</h5>
2438
2439<div class="source">
2440<div class="source">
2441<pre>SELECT authorId,
2442 (SELECT VALUE g.msg
2443 FROM g
2444 WHERE g.msg.message LIKE '% like%'
2445 ORDER BY g.msg.messageId
2446 LIMIT 2) AS msgs
2447FROM GleambookMessages gbm
2448GROUP BY gbm.authorId
2449GROUP AS g(gbm as msg);
2450</pre></div></div>
2451<p>This query returns:</p>
2452
2453<div class="source">
2454<div class="source">
2455<pre> [ {
2456 &quot;msgs&quot;: [
2457 {
2458 &quot;senderLocation&quot;: [
2459 40.33,
2460 80.87
2461 ],
2462 &quot;inResponseTo&quot;: 11,
2463 &quot;messageId&quot;: 8,
2464 &quot;authorId&quot;: 1,
2465 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2466 }
2467 ],
2468 &quot;authorId&quot;: 1
2469}, {
2470 &quot;msgs&quot;: [
2471 {
2472 &quot;senderLocation&quot;: [
2473 48.09,
2474 81.01
2475 ],
2476 &quot;inResponseTo&quot;: 4,
2477 &quot;messageId&quot;: 3,
2478 &quot;authorId&quot;: 2,
2479 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2480 },
2481 {
2482 &quot;senderLocation&quot;: [
2483 31.5,
2484 75.56
2485 ],
2486 &quot;inResponseTo&quot;: 1,
2487 &quot;messageId&quot;: 6,
2488 &quot;authorId&quot;: 2,
2489 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2490 }
2491 ],
2492 &quot;authorId&quot;: 2
2493} ]
2494</pre></div></div>
2495<p>Based on the three variable generation rules, the generated variable for the grouping key expression <tt>message.authorId</tt> is <tt>authorId</tt> (which is how it is referred to in the example&#x2019;s <tt>SELECT</tt> clause).</p></div></div></div>
2496<div class="section">
2497<h3><a name="Implicit_group_variables" id="Implicit_group_variables">Implicit group variables</a></h3>
2498<p>The group variable itself is also optional in SQL++&#x2019;s <tt>GROUP BY</tt> syntax. If a user&#x2019;s query does not declare the name and structure of the group variable using <tt>GROUP AS</tt>, the query compiler will generate a unique group variable whose fields include all of the binding variables defined in the <tt>FROM</tt> clause of the current enclosing <tt>SELECT</tt> statement. (In this case the user&#x2019;s query will not be able to refer to the generated group variable.)</p>
2499<div class="section">
2500<div class="section">
2501<h5><a name="Example"></a>Example</h5>
2502
2503<div class="source">
2504<div class="source">
2505<pre>SELECT uid,
2506 (SELECT m.message
2507 FROM message m
2508 WHERE m.message LIKE '% like%'
2509 ORDER BY m.messageId
2510 LIMIT 2) AS msgs
2511FROM GleambookMessages message
2512GROUP BY message.authorId AS uid;
2513</pre></div></div>
2514<p>This query returns:</p>
2515
2516<div class="source">
2517<div class="source">
2518<pre>[ {
2519 &quot;msgs&quot;: [
2520 {
2521 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
2522 }
2523 ],
2524 &quot;uid&quot;: 1
2525}, {
2526 &quot;msgs&quot;: [
2527 {
2528 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
2529 },
2530 {
2531 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
2532 }
2533 ],
2534 &quot;uid&quot;: 2
2535} ]
2536</pre></div></div>
2537<p>Note that in the query above, in principle, <tt>message</tt> is not an in-scope variable in the <tt>SELECT</tt> clause. However, the query above is a syntactically-sugared simplification of the following query and it is thus legal, executable, and returns the same result:</p>
2538
2539<div class="source">
2540<div class="source">
2541<pre>SELECT uid,
2542 (SELECT g.msg.message
2543 FROM g
2544 WHERE g.msg.message LIKE '% like%'
2545 ORDER BY g.msg.messageId
2546 LIMIT 2) AS msgs
2547FROM GleambookMessages gbm
2548GROUP BY gbm.authorId AS uid GROUP AS g(gbm as msg);
2549</pre></div></div></div></div></div>
2550<div class="section">
2551<h3><a name="Aggregation_functions" id="Aggregation_functions">Aggregation functions</a></h3>
2552<p>In traditional SQL, which doesn&#x2019;t support nested data, grouping always also involves the use of aggregation to compute properties of the groups (e.g., the average number of messages per user rather than the actual set of messages per user). Each aggregation function in SQL++ takes a collection (e.g., the group of messages) as its input and produces a scalar value as its output. These aggregation functions, being truly functional in nature (unlike in SQL), can be used anywhere in a query where an expression is allowed. The following table catalogs the SQL++ built-in aggregation functions and also indicates how each one handles <tt>NULL</tt>/<tt>MISSING</tt> values in the input collection or a completely empty input collection:</p>
2553
2554<table border="0" class="table table-striped">
2555 <thead>
2556
2557<tr class="a">
2558
2559<th>Function </th>
2560
2561<th>NULL </th>
2562
2563<th>MISSING </th>
2564
2565<th>Empty Collection </th>
2566 </tr>
2567 </thead>
2568 <tbody>
2569
2570<tr class="b">
2571
2572<td>COLL_COUNT </td>
2573
2574<td>counted </td>
2575
2576<td>counted </td>
2577
2578<td>0 </td>
2579 </tr>
2580
2581<tr class="a">
2582
2583<td>COLL_SUM </td>
2584
2585<td>returns NULL </td>
2586
2587<td>returns NULL </td>
2588
2589<td>returns NULL </td>
2590 </tr>
2591
2592<tr class="b">
2593
2594<td>COLL_MAX </td>
2595
2596<td>returns NULL </td>
2597
2598<td>returns NULL </td>
2599
2600<td>returns NULL </td>
2601 </tr>
2602
2603<tr class="a">
2604
2605<td>COLL_MIN </td>
2606
2607<td>returns NULL </td>
2608
2609<td>returns NULL </td>
2610
2611<td>returns NULL </td>
2612 </tr>
2613
2614<tr class="b">
2615
2616<td>COLL_AVG </td>
2617
2618<td>returns NULL </td>
2619
2620<td>returns NULL </td>
2621
2622<td>returns NULL </td>
2623 </tr>
2624
2625<tr class="a">
2626
2627<td>ARRAY_COUNT </td>
2628
2629<td>not counted </td>
2630
2631<td>not counted </td>
2632
2633<td>0 </td>
2634 </tr>
2635
2636<tr class="b">
2637
2638<td>ARRAY_SUM </td>
2639
2640<td>ignores NULL </td>
2641
2642<td>ignores NULL </td>
2643
2644<td>returns NULL </td>
2645 </tr>
2646
2647<tr class="a">
2648
2649<td>ARRAY_MAX </td>
2650
2651<td>ignores NULL </td>
2652
2653<td>ignores NULL </td>
2654
2655<td>returns NULL </td>
2656 </tr>
2657
2658<tr class="b">
2659
2660<td>ARRAY_MIN </td>
2661
2662<td>ignores NULL </td>
2663
2664<td>ignores NULL </td>
2665
2666<td>returns NULL </td>
2667 </tr>
2668
2669<tr class="a">
2670
2671<td>ARRAY_AVG </td>
2672
2673<td>ignores NULL </td>
2674
2675<td>ignores NULL </td>
2676
2677<td>returns NULL </td>
2678 </tr>
2679 </tbody>
2680</table>
2681<p>Notice that SQL++ has twice as many functions listed above as there are aggregate functions in SQL-92. This is because SQL++ offers two versions of each &#x2013; one that handles <tt>UNKNOWN</tt> values in a semantically strict fashion, where unknown values in the input result in unknown values in the output &#x2013; and one that handles them in the ad hoc &#x201c;just ignore the unknown values&#x201d; fashion that the SQL standard chose to adopt.</p>
2682<div class="section">
2683<div class="section">
2684<h5><a name="Example"></a>Example</h5>
2685
2686<div class="source">
2687<div class="source">
2688<pre>ARRAY_AVG(
2689 (
2690 SELECT VALUE ARRAY_COUNT(friendIds) FROM GleambookUsers
2691 )
2692);
2693</pre></div></div>
2694<p>This example returns:</p>
2695
2696<div class="source">
2697<div class="source">
2698<pre>3.3333333333333335
2699</pre></div></div></div>
2700<div class="section">
2701<h5><a name="Example"></a>Example</h5>
2702
2703<div class="source">
2704<div class="source">
2705<pre>SELECT uid AS uid, ARRAY_COUNT(grp) AS msgCnt
2706FROM GleambookMessages message
2707GROUP BY message.authorId AS uid GROUP AS grp(message AS msg);
2708</pre></div></div>
2709<p>This query returns:</p>
2710
2711<div class="source">
2712<div class="source">
2713<pre>[ {
2714 &quot;uid&quot;: 1,
2715 &quot;msgCnt&quot;: 5
2716}, {
2717 &quot;uid&quot;: 2,
2718 &quot;msgCnt&quot;: 2
2719} ]
2720</pre></div></div>
2721<p>Notice how the query forms groups where each group involves a message author and their messages. (SQL cannot do this because the grouped intermediate result is non-1NF in nature.) The query then uses the collection aggregate function ARRAY_COUNT to get the cardinality of each group of messages.</p></div></div></div>
2722<div class="section">
2723<h3><a name="SQL-92_aggregation_functions" id="SQL-92_aggregation_functions">SQL-92 aggregation functions</a></h3>
2724<p>For compatibility with the traditional SQL aggregation functions, SQL++ also offers SQL-92&#x2019;s aggregation function symbols (<tt>COUNT</tt>, <tt>SUM</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>AVG</tt>) as supported syntactic sugar. The SQL++ compiler rewrites queries that utilize these function symbols into SQL++ queries that only use the SQL++ collection aggregate functions. The following example uses the SQL-92 syntax approach to compute a result that is identical to that of the more explicit SQL++ example above:</p>
2725<div class="section">
2726<div class="section">
2727<h5><a name="Example"></a>Example</h5>
2728
2729<div class="source">
2730<div class="source">
2731<pre>SELECT uid, COUNT(*) AS msgCnt
2732FROM GleambookMessages msg
2733GROUP BY msg.authorId AS uid;
2734</pre></div></div>
2735<p>It is important to realize that <tt>COUNT</tt> is actually <b>not</b> a SQL++ built-in aggregation function. Rather, the <tt>COUNT</tt> query above is using a special &#x201c;sugared&#x201d; function symbol that the SQL++ compiler will rewrite as follows:</p>
2736
2737<div class="source">
2738<div class="source">
2739<pre>SELECT uid AS uid, ARRAY_COUNT( (SELECT VALUE 1 FROM `$1` as g) ) AS msgCnt
2740FROM GleambookMessages msg
2741GROUP BY msg.authorId AS uid GROUP AS `$1`(msg AS msg);
2742</pre></div></div>
2743<p>The same sort of rewritings apply to the function symbols <tt>SUM</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>AVG</tt>. In contrast to the SQL++ collection aggregate functions, these special SQL-92 function symbols can only be used in the same way they are in standard SQL (i.e., with the same restrictions).</p></div></div></div>
2744<div class="section">
2745<h3><a name="SQL-92_compliant_GROUP_BY_aggregations"></a><a name="SQL-92_compliant_gby" id="SQL-92_compliant_gby">SQL-92 compliant GROUP BY aggregations</a></h3>
2746<p>SQL++ provides full support for SQL-92 <tt>GROUP BY</tt> aggregation queries. The following query is such an example:</p>
2747<div class="section">
2748<div class="section">
2749<h5><a name="Example"></a>Example</h5>
2750
2751<div class="source">
2752<div class="source">
2753<pre>SELECT msg.authorId, COUNT(msg)
2754FROM GleambookMessages msg
2755GROUP BY msg.authorId;
2756</pre></div></div>
2757<p>This query outputs:</p>
2758
2759<div class="source">
2760<div class="source">
2761<pre>[ {
2762 &quot;authorId&quot;: 1,
2763 &quot;$1&quot;: 5
2764}, {
2765 &quot;authorId&quot;: 2,
2766 &quot;$1&quot;: 2
2767} ]
2768</pre></div></div>
2769<p>In principle, a <tt>msg</tt> reference in the query&#x2019;s <tt>SELECT</tt> clause would be &#x201c;sugarized&#x201d; as a collection (as described in <a href="#Implicit_group_variables">Implicit group variables</a>). However, since the SELECT expression <tt>msg.authorId</tt> is syntactically identical to a GROUP BY key expression, it will be internally replaced by the generated group key variable. The following is the equivalent rewritten query that will be generated by the compiler for the query above:</p>
2770
2771<div class="source">
2772<div class="source">
2773<pre>SELECT authorId AS authorId, ARRAY_COUNT( (SELECT g.msg FROM `$1` AS g) )
2774FROM GleambookMessages msg
2775GROUP BY msg.authorId AS authorId GROUP AS `$1`(msg AS msg);
2776</pre></div></div></div></div></div>
2777<div class="section">
2778<h3><a name="Column_aliases" id="Column_aliases">Column aliases</a></h3>
2779<p>SQL++ also allows column aliases to be used as <tt>GROUP BY</tt> keys or <tt>ORDER BY</tt> keys.</p>
2780<div class="section">
2781<div class="section">
2782<h5><a name="Example"></a>Example</h5>
2783
2784<div class="source">
2785<div class="source">
2786<pre>SELECT msg.authorId AS aid, COUNT(msg)
2787FROM GleambookMessages msg
2788GROUP BY aid;
2789</pre></div></div>
2790<p>This query returns:</p>
2791
2792<div class="source">
2793<div class="source">
2794<pre>[ {
2795 &quot;$1&quot;: 5,
2796 &quot;aid&quot;: 1
2797}, {
2798 &quot;$1&quot;: 2,
2799 &quot;aid&quot;: 2
2800} ]
2801</pre></div></div></div></div></div></div>
2802<div class="section">
2803<h2><a name="WHERE_clauses_and_HAVING_clauses"></a><a name="Where_having_clauses" id="Where_having_clauses">WHERE clauses and HAVING clauses</a></h2>
2804<p>Both <tt>WHERE</tt> clauses and <tt>HAVING</tt> clauses are used to filter input data based on a condition expression. Only tuples for which the condition expression evaluates to <tt>TRUE</tt> are propagated. Note that if the condition expression evaluates to <tt>NULL</tt> or <tt>MISSING</tt> the input tuple will be disgarded.</p></div>
2805<div class="section">
2806<h2><a name="ORDER_BY_clauses"></a><a name="Order_By_clauses" id="Order_By_clauses">ORDER BY clauses</a></h2>
2807<p>The <tt>ORDER BY</tt> clause is used to globally sort data in either ascending order (i.e., <tt>ASC</tt>) or descending order (i.e., <tt>DESC</tt>). During ordering, <tt>MISSING</tt> and <tt>NULL</tt> are treated as being smaller than any other value if they are encountered in the ordering key(s). <tt>MISSING</tt> is treated as smaller than <tt>NULL</tt> if both occur in the data being sorted. The following example returns all <tt>GleambookUsers</tt> in descending order by their number of friends.</p>
2808<div class="section">
2809<div class="section">
2810<div class="section">
2811<h5><a name="Example"></a>Example</h5>
2812
2813<div class="source">
2814<div class="source">
2815<pre> SELECT VALUE user
2816 FROM GleambookUsers AS user
2817 ORDER BY ARRAY_COUNT(user.friendIds) DESC;
2818</pre></div></div>
2819<p>This query returns:</p>
2820
2821<div class="source">
2822<div class="source">
2823<pre> [ {
2824 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
2825 &quot;friendIds&quot;: [
2826 2,
2827 3,
2828 6,
2829 10
2830 ],
2831 &quot;gender&quot;: &quot;F&quot;,
2832 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2833 &quot;nickname&quot;: &quot;Mags&quot;,
2834 &quot;alias&quot;: &quot;Margarita&quot;,
2835 &quot;id&quot;: 1,
2836 &quot;employment&quot;: [
2837 {
2838 &quot;organizationName&quot;: &quot;Codetechno&quot;,
2839 &quot;start-date&quot;: &quot;2006-08-06&quot;
2840 },
2841 {
2842 &quot;end-date&quot;: &quot;2010-01-26&quot;,
2843 &quot;organizationName&quot;: &quot;geomedia&quot;,
2844 &quot;start-date&quot;: &quot;2010-06-17&quot;
2845 }
2846 ]
2847 }, {
2848 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
2849 &quot;friendIds&quot;: [
2850 1,
2851 5,
2852 8,
2853 9
2854 ],
2855 &quot;name&quot;: &quot;EmoryUnk&quot;,
2856 &quot;alias&quot;: &quot;Emory&quot;,
2857 &quot;id&quot;: 3,
2858 &quot;employment&quot;: [
2859 {
2860 &quot;organizationName&quot;: &quot;geomedia&quot;,
2861 &quot;endDate&quot;: &quot;2010-01-26&quot;,
2862 &quot;startDate&quot;: &quot;2010-06-17&quot;
2863 }
2864 ]
2865 }, {
2866 &quot;userSince&quot;: &quot;2011-01-22T10:10:00.000Z&quot;,
2867 &quot;friendIds&quot;: [
2868 1,
2869 4
2870 ],
2871 &quot;name&quot;: &quot;IsbelDull&quot;,
2872 &quot;nickname&quot;: &quot;Izzy&quot;,
2873 &quot;alias&quot;: &quot;Isbel&quot;,
2874 &quot;id&quot;: 2,
2875 &quot;employment&quot;: [
2876 {
2877 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
2878 &quot;startDate&quot;: &quot;2010-04-27&quot;
2879 }
2880 ]
2881 } ]
2882</pre></div></div></div></div></div></div>
2883<div class="section">
2884<h2><a name="LIMIT_clauses"></a><a name="Limit_clauses" id="Limit_clauses">LIMIT clauses</a></h2>
2885<p>The <tt>LIMIT</tt> clause is used to limit the result set to a specified constant size. The use of the <tt>LIMIT</tt> clause is illustrated in the next example.</p>
2886<div class="section">
2887<div class="section">
2888<div class="section">
2889<h5><a name="Example"></a>Example</h5>
2890
2891<div class="source">
2892<div class="source">
2893<pre> SELECT VALUE user
2894 FROM GleambookUsers AS user
2895 ORDER BY len(user.friendIds) DESC
2896 LIMIT 1;
2897</pre></div></div>
2898<p>This query returns:</p>
2899
2900<div class="source">
2901<div class="source">
2902<pre> [ {
2903 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
2904 &quot;friendIds&quot;: [
2905 2,
2906 3,
2907 6,
2908 10
2909 ],
2910 &quot;gender&quot;: &quot;F&quot;,
2911 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2912 &quot;nickname&quot;: &quot;Mags&quot;,
2913 &quot;alias&quot;: &quot;Margarita&quot;,
2914 &quot;id&quot;: 1,
2915 &quot;employment&quot;: [
2916 {
2917 &quot;organizationName&quot;: &quot;Codetechno&quot;,
2918 &quot;start-date&quot;: &quot;2006-08-06&quot;
2919 },
2920 {
2921 &quot;end-date&quot;: &quot;2010-01-26&quot;,
2922 &quot;organizationName&quot;: &quot;geomedia&quot;,
2923 &quot;start-date&quot;: &quot;2010-06-17&quot;
2924 }
2925 ]
2926 } ]
2927</pre></div></div></div></div></div></div>
2928<div class="section">
2929<h2><a name="WITH_clauses"></a><a name="With_clauses" id="With_clauses">WITH clauses</a></h2>
2930<p>As in standard SQL, <tt>WITH</tt> clauses are available to improve the modularity of a query. The next query shows an example.</p>
2931<div class="section">
2932<div class="section">
2933<div class="section">
2934<h5><a name="Example"></a>Example</h5>
2935
2936<div class="source">
2937<div class="source">
2938<pre>WITH avgFriendCount AS (
2939 SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
2940 FROM GleambookUsers AS user
2941)[0]
2942SELECT VALUE user
2943FROM GleambookUsers user
2944WHERE ARRAY_COUNT(user.friendIds) &gt; avgFriendCount;
2945</pre></div></div>
2946<p>This query returns:</p>
2947
2948<div class="source">
2949<div class="source">
2950<pre>[ {
2951 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
2952 &quot;friendIds&quot;: [
2953 2,
2954 3,
2955 6,
2956 10
2957 ],
2958 &quot;gender&quot;: &quot;F&quot;,
2959 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2960 &quot;nickname&quot;: &quot;Mags&quot;,
2961 &quot;alias&quot;: &quot;Margarita&quot;,
2962 &quot;id&quot;: 1,
2963 &quot;employment&quot;: [
2964 {
2965 &quot;organizationName&quot;: &quot;Codetechno&quot;,
2966 &quot;start-date&quot;: &quot;2006-08-06&quot;
2967 },
2968 {
2969 &quot;end-date&quot;: &quot;2010-01-26&quot;,
2970 &quot;organizationName&quot;: &quot;geomedia&quot;,
2971 &quot;start-date&quot;: &quot;2010-06-17&quot;
2972 }
2973 ]
2974}, {
2975 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
2976 &quot;friendIds&quot;: [
2977 1,
2978 5,
2979 8,
2980 9
2981 ],
2982 &quot;name&quot;: &quot;EmoryUnk&quot;,
2983 &quot;alias&quot;: &quot;Emory&quot;,
2984 &quot;id&quot;: 3,
2985 &quot;employment&quot;: [
2986 {
2987 &quot;organizationName&quot;: &quot;geomedia&quot;,
2988 &quot;endDate&quot;: &quot;2010-01-26&quot;,
2989 &quot;startDate&quot;: &quot;2010-06-17&quot;
2990 }
2991 ]
2992} ]
2993</pre></div></div>
2994<p>The query is equivalent to the following, more complex, inlined form of the query:</p>
2995
2996<div class="source">
2997<div class="source">
2998<pre>SELECT *
2999FROM GleambookUsers user
3000WHERE ARRAY_COUNT(user.friendIds) &gt;
3001 ( SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
3002 FROM GleambookUsers AS user
3003 ) [0];
3004</pre></div></div>
3005<p>WITH can be particularly useful when a value needs to be used several times in a query.</p>
3006<p>Before proceeding further, notice that both the WITH query and its equivalent inlined variant include the syntax &#x201c;[0]&#x201d; &#x2013; this is due to a noteworthy difference between SQL++ and SQL-92. In SQL-92, whenever a scalar value is expected and it is being produced by a query expression, the SQL-92 query processor will evaluate the expression, check that there is only one row and column in the result at runtime, and then coerce the one-row/one-column tabular result into a scalar value. SQL++, being designed to deal with nested data and schema-less data, does not (and should not) do this. Collection-valued data is perfectly legal in most SQL++ contexts, and its data is schema-less, so a query processor rarely knows exactly what to expect where and such automatic conversion is often not desirable. Thus, in the queries above, the use of &#x201c;[0]&#x201d; extracts the first (i.e., 0th) element of an array-valued query expression&#x2019;s result; this is needed above, even though the result is an array of one element, to extract the only element in the singleton array and obtain the desired scalar for the comparison.</p></div></div></div></div>
3007<div class="section">
3008<h2><a name="LET_clauses"></a><a name="Let_clauses" id="Let_clauses">LET clauses</a></h2>
3009<p>Similar to <tt>WITH</tt> clauses, <tt>LET</tt> clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The next query shows an example.</p>
3010<div class="section">
3011<div class="section">
3012<div class="section">
3013<h5><a name="Example"></a>Example</h5>
3014
3015<div class="source">
3016<div class="source">
3017<pre>SELECT u.name AS uname, messages AS messages
3018FROM GleambookUsers u
3019LET messages = (SELECT VALUE m
3020 FROM GleambookMessages m
3021 WHERE m.authorId = u.id)
3022WHERE EXISTS messages;
3023</pre></div></div>
3024<p>This query lists <tt>GleambookUsers</tt> that have posted <tt>GleambookMessages</tt> and shows all authored messages for each listed user. It returns:</p>
3025
3026<div class="source">
3027<div class="source">
3028<pre>[ {
3029 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
3030 &quot;messages&quot;: [
3031 {
3032 &quot;senderLocation&quot;: [
3033 38.97,
3034 77.49
3035 ],
3036 &quot;inResponseTo&quot;: 1,
3037 &quot;messageId&quot;: 11,
3038 &quot;authorId&quot;: 1,
3039 &quot;message&quot;: &quot; can't stand at&amp;t its plan is terrible&quot;
3040 },
3041 {
3042 &quot;senderLocation&quot;: [
3043 41.66,
3044 80.87
3045 ],
3046 &quot;inResponseTo&quot;: 4,
3047 &quot;messageId&quot;: 2,
3048 &quot;authorId&quot;: 1,
3049 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
3050 },
3051 {
3052 &quot;senderLocation&quot;: [
3053 37.73,
3054 97.04
3055 ],
3056 &quot;inResponseTo&quot;: 2,
3057 &quot;messageId&quot;: 4,
3058 &quot;authorId&quot;: 1,
3059 &quot;message&quot;: &quot; can't stand at&amp;t the network is horrible:(&quot;
3060 },
3061 {
3062 &quot;senderLocation&quot;: [
3063 40.33,
3064 80.87
3065 ],
3066 &quot;inResponseTo&quot;: 11,
3067 &quot;messageId&quot;: 8,
3068 &quot;authorId&quot;: 1,
3069 &quot;message&quot;: &quot; like verizon the 3G is awesome:)&quot;
3070 },
3071 {
3072 &quot;senderLocation&quot;: [
3073 42.5,
3074 70.01
3075 ],
3076 &quot;inResponseTo&quot;: 12,
3077 &quot;messageId&quot;: 10,
3078 &quot;authorId&quot;: 1,
3079 &quot;message&quot;: &quot; can't stand motorola the touch-screen is terrible&quot;
3080 }
3081 ]
3082}, {
3083 &quot;uname&quot;: &quot;IsbelDull&quot;,
3084 &quot;messages&quot;: [
3085 {
3086 &quot;senderLocation&quot;: [
3087 31.5,
3088 75.56
3089 ],
3090 &quot;inResponseTo&quot;: 1,
3091 &quot;messageId&quot;: 6,
3092 &quot;authorId&quot;: 2,
3093 &quot;message&quot;: &quot; like t-mobile its platform is mind-blowing&quot;
3094 },
3095 {
3096 &quot;senderLocation&quot;: [
3097 48.09,
3098 81.01
3099 ],
3100 &quot;inResponseTo&quot;: 4,
3101 &quot;messageId&quot;: 3,
3102 &quot;authorId&quot;: 2,
3103 &quot;message&quot;: &quot; like samsung the plan is amazing&quot;
3104 }
3105 ]
3106} ]
3107</pre></div></div>
3108<p>This query is equivalent to the following query that does not use the <tt>LET</tt> clause:</p>
3109
3110<div class="source">
3111<div class="source">
3112<pre>SELECT u.name AS uname, ( SELECT VALUE m
3113 FROM GleambookMessages m
3114 WHERE m.authorId = u.id
3115 ) AS messages
3116FROM GleambookUsers u
3117WHERE EXISTS ( SELECT VALUE m
3118 FROM GleambookMessages m
3119 WHERE m.authorId = u.id
3120 );
3121</pre></div></div></div></div></div></div>
3122<div class="section">
3123<h2><a name="UNION_ALL"></a><a name="Union_all" id="Union_all">UNION ALL</a></h2>
3124<p>UNION ALL can be used to combine two input streams into one. As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, SQL++ does not constrain what the data looks like on the input streams; in particular, it allows heterogenity on the input and output streams. A type error will be raised if one of the inputs is not a collection. The following odd but legal query is an example:</p>
3125<div class="section">
3126<div class="section">
3127<div class="section">
3128<h5><a name="Example"></a>Example</h5>
3129
3130<div class="source">
3131<div class="source">
3132<pre>SELECT u.name AS uname
3133FROM GleambookUsers u
3134WHERE u.id = 2
3135 UNION ALL
3136SELECT VALUE m.message
3137FROM GleambookMessages m
3138WHERE authorId=2;
3139</pre></div></div>
3140<p>This query returns:</p>
3141
3142<div class="source">
3143<div class="source">
3144<pre>[
3145 &quot; like t-mobile its platform is mind-blowing&quot;
3146 , {
3147 &quot;uname&quot;: &quot;IsbelDull&quot;
3148}, &quot; like samsung the plan is amazing&quot;
3149 ]
3150</pre></div></div></div></div></div></div>
3151<div class="section">
3152<h2><a name="Subqueries" id="Subqueries">Subqueries</a></h2>
3153<p>In SQL++, an arbitrary subquery can appear anywhere that an expression can appear. Unlike SQL-92, as was just alluded to, the subqueries in a SELECT list or a boolean predicate need not return singleton, single-column relations. Instead, they may return arbitrary collections. For example, the following query is a variant of the prior group-by query examples; it retrieves an array of up to two &#x201c;dislike&#x201d; messages per user.</p>
3154<div class="section">
3155<div class="section">
3156<div class="section">
3157<h5><a name="Example"></a>Example</h5>
3158
3159<div class="source">
3160<div class="source">
3161<pre>SELECT uid,
3162 (SELECT VALUE m.msg
3163 FROM msgs m
3164 WHERE m.msg.message LIKE '%dislike%'
3165 ORDER BY m.msg.messageId
3166 LIMIT 2) AS msgs
3167FROM GleambookMessages message
3168GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
3169</pre></div></div>
3170<p>For our sample data set, this query returns:</p>
3171
3172<div class="source">
3173<div class="source">
3174<pre>[ {
3175 &quot;msgs&quot;: [
3176 {
3177 &quot;senderLocation&quot;: [
3178 41.66,
3179 80.87
3180 ],
3181 &quot;inResponseTo&quot;: 4,
3182 &quot;messageId&quot;: 2,
3183 &quot;authorId&quot;: 1,
3184 &quot;message&quot;: &quot; dislike iphone its touch-screen is horrible&quot;
3185 }
3186 ],
3187 &quot;uid&quot;: 1
3188}, {
3189 &quot;msgs&quot;: [
3190
3191 ],
3192 &quot;uid&quot;: 2
3193} ]
3194</pre></div></div>
3195<p>Note that a subquery, like a top-level <tt>SELECT</tt> statment, always returns a collection &#x2013; regardless of where within a query the subquery occurs &#x2013; and again, its result is never automatically cast into a scalar.</p></div></div></div></div>
3196<div class="section">
3197<h2><a name="SQL_vs._SQL-92"></a><a name="Vs_SQL-92" id="Vs_SQL-92">SQL++ vs. SQL-92</a></h2>
3198<p>SQL++ offers the following additional features beyond SQL-92 (hence the &#x201c;++&#x201d; in its name):</p>
3199
3200<ul>
3201
3202<li>Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.</li>
3203
3204<li>Schema-free: The query language does not assume the existence of a static schema for any data that it processes.</li>
3205
3206<li>Correlated FROM terms: A right-side FROM term expression can refer to variables defined by FROM terms on its left.</li>
3207
3208<li>Powerful GROUP BY: In addition to a set of aggregate functions as in standard SQL, the groups created by the <tt>GROUP BY</tt> clause are directly usable in nested queries and/or to obtain nested results.</li>
3209
3210<li>Generalized SELECT clause: A SELECT clause can return any type of collection, while in SQL-92, a <tt>SELECT</tt> clause has to return a (homogeneous) collection of objects.</li>
3211</ul>
3212<p>The following matrix is a quick &#x201c;SQL-92 compatibility cheat sheet&#x201d; for SQL++.</p>
3213
3214<table border="0" class="table table-striped">
3215 <thead>
3216
3217<tr class="a">
3218
3219<th>Feature </th>
3220
3221<th>SQL++ </th>
3222
3223<th>SQL-92 </th>
3224
3225<th>Why different? </th>
3226 </tr>
3227 </thead>
3228 <tbody>
3229
3230<tr class="b">
3231
3232<td>SELECT * </td>
3233
3234<td>Returns nested objects </td>
3235
3236<td>Returns flattened concatenated objects </td>
3237
3238<td>Nested collections are 1st class citizens </td>
3239 </tr>
3240
3241<tr class="a">
3242
3243<td>SELECT list </td>
3244
3245<td>order not preserved </td>
3246
3247<td>order preserved </td>
3248
3249<td>Fields in a JSON object is not ordered </td>
3250 </tr>
3251
3252<tr class="b">
3253
3254<td>Subquery </td>
3255
3256<td>Returns a collection </td>
3257
3258<td>The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function </td>
3259
3260<td>Nested collections are 1st class citizens </td>
3261 </tr>
3262
3263<tr class="a">
3264
3265<td>LEFT OUTER JOIN </td>
3266
3267<td>Fills in <tt>MISSING</tt>(s) for non-matches </td>
3268
3269<td>Fills in <tt>NULL</tt>(s) for non-matches </td>
3270
3271<td>&#x201c;Absence&#x201d; is more appropriate than &#x201c;unknown&#x201d; here. </td>
3272 </tr>
3273
3274<tr class="b">
3275
3276<td>UNION ALL </td>
3277
3278<td>Allows heterogeneous inputs and output </td>
3279
3280<td>Input streams must be UNION-compatible and output field names are drawn from the first input stream </td>
3281
3282<td>Heterogenity and nested collections are common </td>
3283 </tr>
3284
3285<tr class="a">
3286
3287<td>IN constant_expr </td>
3288
3289<td>The constant expression has to be an array or multiset, i.e., [..,..,&#x2026;] </td>
3290
3291<td>The constant collection can be represented as comma-separated items in a paren pair </td>
3292
3293<td>Nested collections are 1st class citizens </td>
3294 </tr>
3295
3296<tr class="b">
3297
3298<td>String literal </td>
3299
3300<td>Double quotes or single quotes </td>
3301
3302<td>Single quotes only </td>
3303
3304<td>Double quoted strings are pervasive </td>
3305 </tr>
3306
3307<tr class="a">
3308
3309<td>Delimited identifiers </td>
3310
3311<td>Backticks </td>
3312
3313<td>Double quotes </td>
3314
3315<td>Double quoted strings are pervasive </td>
3316 </tr>
3317 </tbody>
3318</table>
3319<p>The following SQL-92 features are not implemented yet. However, SQL++ does not conflict those features:</p>
3320
3321<ul>
3322
3323<li>CROSS JOIN, NATURAL JOIN, UNION JOIN</li>
3324
3325<li>RIGHT and FULL OUTER JOIN</li>
3326
3327<li>INTERSECT, EXCEPT, UNION with set semantics</li>
3328
3329<li>CAST expression</li>
3330
3331<li>NULLIF expression</li>
3332
3333<li>COALESCE expression</li>
3334
3335<li>ALL and SOME predicates for linking to subqueries</li>
3336
3337<li>UNIQUE predicate (tests a collection for duplicates)</li>
3338
3339<li>MATCH predicate (tests for referential integrity)</li>
3340
3341<li>Row and Table constructors</li>
3342
3343<li>DISTINCT aggregates</li>
3344
3345<li>Preserved order for expressions in a SELECT list</li>
3346</ul>
3347<h1><a name="Errors" id="Errors">4. Errors</a></h1>
3348<p>A SQL++ query can potentially result in one of the following errors:</p>
3349
3350<ul>
3351
3352<li>syntax error,</li>
3353
3354<li>identifier resolution error,</li>
3355
3356<li>type error,</li>
3357
3358<li>resource error.</li>
3359</ul>
3360<p>If the query processor runs into any error, it will terminate the ongoing processing of the query and immediately return an error message to the client.</p></div>
3361<div class="section">
3362<h2><a name="Syntax_Errors"></a><a name="Syntax_errors" id="Syntax_errors">Syntax Errors</a></h2>
3363<p>An valid SQL++ query must satisfy the SQL++ grammar rules. Otherwise, a syntax error will be raised.</p>
3364<div class="section">
3365<div class="section">
3366<div class="section">
3367<h5><a name="Example"></a>Example</h5>
3368
3369<div class="source">
3370<div class="source">
3371<pre>SELECT *
3372FROM GleambookUsers user
3373</pre></div></div>
3374<p>Since the ending semi-colon is mandatory for any SQL++ query, we will get a syntax error as follows:</p>
3375
3376<div class="source">
3377<div class="source">
3378<pre>Error: Syntax error: In line 2 &gt;&gt;FROM GleambookUsers user&lt;&lt; Encountered &lt;EOF&gt; at column 24.
3379==&gt; FROM GleambookUsers user
3380</pre></div></div></div>
3381<div class="section">
3382<h5><a name="Example"></a>Example</h5>
3383
3384<div class="source">
3385<div class="source">
3386<pre>SELECT *
3387FROM GleambookUsers user
3388WHERE type=&quot;advertiser&quot;;
3389</pre></div></div>
3390<p>Since &#x201c;type&#x201d; a <a href="#Reserved_keywords">reserved keyword</a> in the SQL++ parser, we will get a syntax error as follows:</p>
3391
3392<div class="source">
3393<div class="source">
3394<pre>Error: Syntax error: In line 3 &gt;&gt;WHERE type=&quot;advertiser&quot;;&lt;&lt; Encountered 'type' &quot;type&quot; at column 7.
3395==&gt; WHERE type=&quot;advertiser&quot;;
3396</pre></div></div></div></div></div></div>
3397<div class="section">
3398<h2><a name="Identifier_Resolution_Errors"></a><a name="Identifier_resolution_errors" id="Identifier_resolution_errors">Identifier Resolution Errors</a></h2>
3399<p>Referring an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access.</p>
3400<div class="section">
3401<div class="section">
3402<div class="section">
3403<h5><a name="Example"></a>Example</h5>
3404
3405<div class="source">
3406<div class="source">
3407<pre>SELECT *
3408FROM GleambookUser user;
3409</pre></div></div>
3410<p>Assume we have a typo in &#x201c;GleambookUser&#x201d; which misses the ending &#x201c;s&#x201d;, we will get an identifier resolution error as follows:</p>
3411
3412<div class="source">
3413<div class="source">
3414<pre>Error: Cannot find dataset GleambookUser in dataverse Default nor an alias with name GleambookUser!
3415</pre></div></div></div>
3416<div class="section">
3417<h5><a name="Example"></a>Example</h5>
3418
3419<div class="source">
3420<div class="source">
3421<pre>SELECT name, message
3422FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
3423</pre></div></div>
3424<p>If the compiler cannot figure out all possible fields in <tt>GleambookUsers</tt> and <tt>GleambookMessages</tt>, we will get an identifier resolution error as follows:</p>
3425
3426<div class="source">
3427<div class="source">
3428<pre>Error: Cannot resolve ambiguous alias reference for undefined identifier name
3429</pre></div></div></div></div></div></div>
3430<div class="section">
3431<h2><a name="Type_Errors"></a><a name="Type_errors" id="Type_errors">Type Errors</a></h2>
3432<p>The SQL++ compiler does type checks based on its available type information. In addition, the SQL++ runtime also reports type errors if a data model instance it processes does not satisfy the type requirement.</p>
3433<div class="section">
3434<div class="section">
3435<div class="section">
3436<h5><a name="Example"></a>Example</h5>
3437
3438<div class="source">
3439<div class="source">
3440<pre>abs(&quot;123&quot;);
3441</pre></div></div>
3442<p>Since function <tt>abs</tt> can only process numeric input values, we will get a type error as follows:</p>
3443
3444<div class="source">
3445<div class="source">
3446<pre>Error: Arithmetic operations are not implemented for string
3447</pre></div></div></div></div></div></div>
3448<div class="section">
3449<h2><a name="Resource_Errors"></a><a name="Resource_errors" id="Resource_errors">Resource Errors</a></h2>
3450<p>A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource errors could be potentially be seen when running the system:</p>
3451
3452<div class="source">
3453<div class="source">
3454<pre>Error: no space left on device
3455Error: too many open files
3456</pre></div></div>
3457<p>The &#x201c;no space left on device&#x201d; issue usually can be fixed by cleaning up disk spaces and reserving more disk spaces for the system. The &#x201c;too many open files&#x201d; issue usually can be fixed by a system administrator, following the instructions <a class="externalLink" href="https://easyengine.io/tutorials/linux/increase-open-files-limit/">here</a>.</p>
3458<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3459 ! or more contributor license agreements. See the NOTICE file
3460 ! distributed with this work for additional information
3461 ! regarding copyright ownership. The ASF licenses this file
3462 ! to you under the Apache License, Version 2.0 (the
3463 ! "License"); you may not use this file except in compliance
3464 ! with the License. You may obtain a copy of the License at
3465 !
3466 ! http://www.apache.org/licenses/LICENSE-2.0
3467 !
3468 ! Unless required by applicable law or agreed to in writing,
3469 ! software distributed under the License is distributed on an
3470 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3471 ! KIND, either express or implied. See the License for the
3472 ! specific language governing permissions and limitations
3473 ! under the License.
3474 ! -->
3475<h1><a name="DDL_and_DML_statements" id="DDL_and_DML_statements">4. DDL and DML statements</a></h1>
3476
3477<div class="source">
3478<div class="source">
3479<pre>Statement ::= ( SingleStatement ( &quot;;&quot; )? )* &lt;EOF&gt;
3480SingleStatement ::= DatabaseDeclaration
3481 | FunctionDeclaration
3482 | CreateStatement
3483 | DropStatement
3484 | LoadStatement
3485 | SetStatement
3486 | InsertStatement
3487 | DeleteStatement
3488 | Query &quot;;&quot;
3489</pre></div></div>
3490<p>In addition to queries, an implementation of SQL++ needs to support statements for data definition and manipulation purposes as well as controlling the context to be used in evaluating SQL++ expressions. This section details the DDL and DML statements supported in the SQL++ language as realized today in Apache AsterixDB.</p></div>
3491<div class="section">
3492<h2><a name="Declarations" id="Declarations">Declarations</a></h2>
3493
3494<div class="source">
3495<div class="source">
3496<pre>DatabaseDeclaration ::= &quot;USE&quot; Identifier
3497</pre></div></div>
3498<p>At the uppermost level, the world of data is organized into data namespaces called <b>dataverses</b>. To set the default dataverse for a series of statements, the USE statement is provided in SQL++.</p>
3499<p>As an example, the following statement sets the default dataverse to be &#x201c;TinySocial&#x201d;.</p>
3500<div class="section">
3501<div class="section">
3502<div class="section">
3503<h5><a name="Example"></a>Example</h5>
3504
3505<div class="source">
3506<div class="source">
3507<pre>USE TinySocial;
3508</pre></div></div>
3509<p>When writing a complex SQL++ query, it can sometimes be helpful to define one or more auxilliary functions that each address a sub-piece of the overall query. The declare function statement supports the creation of such helper functions. In general, the function body (expression) can be any legal SQL++ query expression.</p>
3510
3511<div class="source">
3512<div class="source">
3513<pre>FunctionDeclaration ::= &quot;DECLARE&quot; &quot;FUNCTION&quot; Identifier ParameterList &quot;{&quot; Expression &quot;}&quot;
3514ParameterList ::= &quot;(&quot; ( &lt;VARIABLE&gt; ( &quot;,&quot; &lt;VARIABLE&gt; )* )? &quot;)&quot;
3515</pre></div></div>
3516<p>The following is a simple example of a temporary SQL++ function definition and its use.</p></div>
3517<div class="section">
3518<h5><a name="Example"></a>Example</h5>
3519
3520<div class="source">
3521<div class="source">
3522<pre>DECLARE FUNCTION friendInfo(userId) {
3523 (SELECT u.id, u.name, len(u.friendIds) AS friendCount
3524 FROM GleambookUsers u
3525 WHERE u.id = userId)[0]
3526 };
3527
3528SELECT VALUE friendInfo(2);
3529</pre></div></div>
3530<p>For our sample data set, this returns:</p>
3531
3532<div class="source">
3533<div class="source">
3534<pre>[
3535 { &quot;id&quot;: 2, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;friendCount&quot;: 2 }
3536]
3537</pre></div></div></div></div></div></div>
3538<div class="section">
3539<h2><a name="Lifecycle_management_statements" id="Lifecycle_management_statements">Lifecycle management statements</a></h2>
3540
3541<div class="source">
3542<div class="source">
3543<pre>CreateStatement ::= &quot;CREATE&quot; ( DatabaseSpecification
3544 | TypeSpecification
3545 | DatasetSpecification
3546 | IndexSpecification
3547 | FunctionSpecification )
3548
3549QualifiedName ::= Identifier ( &quot;.&quot; Identifier )?
3550DoubleQualifiedName ::= Identifier &quot;.&quot; Identifier ( &quot;.&quot; Identifier )?
3551</pre></div></div>
3552<p>The CREATE statement in SQL++ is used for creating dataverses as well as other persistent artifacts in a dataverse. It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined SQL++ functions.</p>
3553<div class="section">
3554<h3><a name="Dataverses" id="Dataverses"> Dataverses</a></h3>
3555
3556<div class="source">
3557<div class="source">
3558<pre>DatabaseSpecification ::= &quot;DATAVERSE&quot; Identifier IfNotExists
3559</pre></div></div>
3560<p>The CREATE DATAVERSE statement is used to create new dataverses. To ease the authoring of reusable SQL++ scripts, an optional IF NOT EXISTS clause is included to allow creation to be requested either unconditionally or only if the dataverse does not already exist. If this clause is absent, an error is returned if a dataverse with the indicated name already exists.</p>
3561<p>The following example creates a new dataverse named TinySocial if one does not already exist.</p>
3562<div class="section">
3563<div class="section">
3564<h5><a name="Example"></a>Example</h5>
3565
3566<div class="source">
3567<div class="source">
3568<pre>CREATE DATAVERSE TinySocial IF NOT EXISTS;
3569</pre></div></div></div></div></div>
3570<div class="section">
3571<h3><a name="Types" id="Types"> Types</a></h3>
3572
3573<div class="source">
3574<div class="source">
3575<pre>TypeSpecification ::= &quot;TYPE&quot; FunctionOrTypeName IfNotExists &quot;AS&quot; ObjectTypeDef
3576FunctionOrTypeName ::= QualifiedName
3577IfNotExists ::= ( &lt;IF&gt; &lt;NOT&gt; &lt;EXISTS&gt; )?
3578TypeExpr ::= ObjectTypeDef | TypeReference | ArrayTypeDef | MultisetTypeDef
3579ObjectTypeDef ::= ( &lt;CLOSED&gt; | &lt;OPEN&gt; )? &quot;{&quot; ( ObjectField ( &quot;,&quot; ObjectField )* )? &quot;}&quot;
3580ObjectField ::= Identifier &quot;:&quot; ( TypeExpr ) ( &quot;?&quot; )?
3581NestedField ::= Identifier ( &quot;.&quot; Identifier )*
3582IndexField ::= NestedField ( &quot;:&quot; TypeReference )?
3583TypeReference ::= Identifier
3584ArrayTypeDef ::= &quot;[&quot; ( TypeExpr ) &quot;]&quot;
3585MultisetTypeDef ::= &quot;{{&quot; ( TypeExpr ) &quot;}}&quot;
3586</pre></div></div>
3587<p>The CREATE TYPE statement is used to create a new named datatype. This type can then be used to create stored collections or utilized when defining one or more other datatypes. Much more information about the data model is available in the <a href="datamodel.html">data model reference guide</a>. A new type can be a object type, a renaming of another type, an array type, or a multiset type. A object type can be defined as being either open or closed. Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.</p>
3588<p>The following example creates a new object type called GleambookUser type. Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. The first four fields are essentially traditional typed name/value pairs (much like SQL fields). The friendIds field is a multiset of integers. The employment field is an array of instances of another named object type, EmploymentType.</p>
3589<div class="section">
3590<div class="section">
3591<h5><a name="Example"></a>Example</h5>
3592
3593<div class="source">
3594<div class="source">
3595<pre>CREATE TYPE GleambookUserType AS {
3596 id: int,
3597 alias: string,
3598 name: string,
3599 userSince: datetime,
3600 friendIds: {{ int }},
3601 employment: [ EmploymentType ]
3602};
3603</pre></div></div>
3604<p>The next example creates a new object type, closed this time, called MyUserTupleType. Instances of this closed type will not be permitted to have extra fields, although the alias field is marked as optional and may thus be NULL or MISSING in legal instances of the type. Note that the type of the id field in the example is UUID. This field type can be used if you want to have this field be an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)</p></div>
3605<div class="section">
3606<h5><a name="Example"></a>Example</h5>
3607
3608<div class="source">
3609<div class="source">
3610<pre>CREATE TYPE MyUserTupleType AS CLOSED {
3611 id: uuid,
3612 alias: string?,
3613 name: string
3614};
3615</pre></div></div></div></div></div>
3616<div class="section">
3617<h3><a name="Datasets" id="Datasets"> Datasets</a></h3>
3618
3619<div class="source">
3620<div class="source">
3621<pre>DatasetSpecification ::= ( &lt;INTERNAL&gt; )? &lt;DATASET&gt; QualifiedName &quot;(&quot; QualifiedName &quot;)&quot; IfNotExists
3622 PrimaryKey ( &lt;ON&gt; Identifier )? ( &lt;HINTS&gt; Properties )?
3623 ( &quot;USING&quot; &quot;COMPACTION&quot; &quot;POLICY&quot; CompactionPolicy ( Configuration )? )?
3624 ( &lt;WITH&gt; &lt;FILTER&gt; &lt;ON&gt; Identifier )?
3625 |
3626 &lt;EXTERNAL&gt; &lt;DATASET&gt; QualifiedName &quot;(&quot; QualifiedName &quot;)&quot; IfNotExists &lt;USING&gt; AdapterName
3627 Configuration ( &lt;HINTS&gt; Properties )?
3628 ( &lt;USING&gt; &lt;COMPACTION&gt; &lt;POLICY&gt; CompactionPolicy ( Configuration )? )?
3629AdapterName ::= Identifier
3630Configuration ::= &quot;(&quot; ( KeyValuePair ( &quot;,&quot; KeyValuePair )* )? &quot;)&quot;
3631KeyValuePair ::= &quot;(&quot; StringLiteral &quot;=&quot; StringLiteral &quot;)&quot;
3632Properties ::= ( &quot;(&quot; Property ( &quot;,&quot; Property )* &quot;)&quot; )?
3633Property ::= Identifier &quot;=&quot; ( StringLiteral | IntegerLiteral )
3634FunctionSignature ::= FunctionOrTypeName &quot;@&quot; IntegerLiteral
3635PrimaryKey ::= &lt;PRIMARY&gt; &lt;KEY&gt; NestedField ( &quot;,&quot; NestedField )* ( &lt;AUTOGENERATED&gt; )?
3636CompactionPolicy ::= Identifier
3637</pre></div></div>
3638<p>The CREATE DATASET statement is used to create a new dataset. Datasets are named, multisets of object type instances; they are where data lives persistently and are the usual targets for SQL++ queries. Datasets are typed, and the system ensures that their contents conform to their type definitions. An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. It is required to have a specified unique primary key field which uniquely identifies the contained objects. (The primary key is also used in secondary indexes to identify the indexed primary data objects.)</p>
3639<p>Internal datasets contain several advanced options that can be specified when appropriate. One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting &#x201c;AUTOGENERATED&#x201d; after the &#x201c;PRIMARY KEY&#x201d; identifier. In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object&#x2019;s primary key field value will be auto-generated by the system.</p>
3640<p>Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the underlying LSM storage components to be merged. (The system supports Log-Structured Merge tree based physical storage for Internal datasets.) Currently the system supports four different component merging policies that can be chosen per dataset: no-merge, constant, prefix, and correlated-prefix. The no-merge policy simply never merges disk components. The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. The prefix policy relies on both component sizes and the number of components to decide which components to merge. It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component&#x2019;s sizes exceeds M or the number of components in the sequence exceeds another threshold C. If such a sequence exists, the components in the sequence are merged together to form a single component. Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. The system&#x2019;s default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.</p>
3641<p>Another advanced option shown in the syntax above, related to performance and mentioned above, is that a <b>filter</b> can optionally be created on a field to further optimize range queries with predicates on the filter&#x2019;s field. Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. (Refer to <a href="filters.html">Filter-Based LSM Index Acceleration</a> for more information about filters.)</p>
3642<p>An External dataset, in contrast to an Internal dataset, has data stored outside of the system&#x2019;s control. Files living in HDFS or in the local filesystem(s) of a cluster&#x2019;s nodes are currently supported. External dataset support allows SQL++ queries to treat foreign data as though it were stored in the system, making it possible to query &#x201c;legacy&#x201d; file data (e.g., Hive data) without having to physically import it. When defining an External dataset, an appropriate adapter type must be selected for the desired external data. (See the <a href="externaldata.html">Guide to External Data</a> for more information on the available adapters.)</p>
3643<p>The following example creates an Internal dataset for storing FacefookUserType objects. It specifies that their id field is their primary key.</p>
3644<div class="section">
3645<h4><a name="Example"></a>Example</h4>
3646
3647<div class="source">
3648<div class="source">
3649<pre>CREATE INTERNAL DATASET GleambookUsers(GleambookUserType) PRIMARY KEY id;
3650</pre></div></div>
3651<p>The next example creates another Internal dataset (the default kind when no dataset kind is specified) for storing MyUserTupleType objects. It specifies that the id field should be used as the primary key for the dataset. It also specifies that the id field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.) Note that the id field&#x2019;s declared type must be UUID in this case.</p></div>
3652<div class="section">
3653<h4><a name="Example"></a>Example</h4>
3654
3655<div class="source">
3656<div class="source">
3657<pre>CREATE DATASET MyUsers(MyUserTupleType) PRIMARY KEY id AUTOGENERATED;
3658</pre></div></div>
3659<p>The next example creates an External dataset for querying LineItemType objects. The choice of the <tt>hdfs</tt> adapter means that this dataset&#x2019;s data actually resides in HDFS. The example CREATE statement also provides parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format.</p></div>
3660<div class="section">
3661<h4><a name="Example"></a>Example</h4>
3662
3663<div class="source">
3664<div class="source">
3665<pre>CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
3666 (&quot;hdfs&quot;=&quot;hdfs://HOST:PORT&quot;),
3667 (&quot;path&quot;=&quot;HDFS_PATH&quot;),
3668 (&quot;input-format&quot;=&quot;text-input-format&quot;),
3669 (&quot;format&quot;=&quot;delimited-text&quot;),
3670 (&quot;delimiter&quot;=&quot;|&quot;));
3671</pre></div></div></div>
3672<div class="section">
3673<h4><a name="Indices"></a>Indices</h4>
3674
3675<div class="source">
3676<div class="source">
3677<pre>IndexSpecification ::= &lt;INDEX&gt; Identifier IfNotExists &lt;ON&gt; QualifiedName
3678 &quot;(&quot; ( IndexField ) ( &quot;,&quot; IndexField )* &quot;)&quot; ( &quot;type&quot; IndexType &quot;?&quot;)?
3679 ( &lt;ENFORCED&gt; )?
3680IndexType ::= &lt;BTREE&gt; | &lt;RTREE&gt; | &lt;KEYWORD&gt; | &lt;NGRAM&gt; &quot;(&quot; IntegerLiteral &quot;)&quot;
3681</pre></div></div>
3682<p>The CREATE INDEX statement creates a secondary index on one or more fields of a specified dataset. Supported index types include <tt>BTREE</tt> for totally ordered datatypes, <tt>RTREE</tt> for spatial data, and <tt>KEYWORD</tt> and <tt>NGRAM</tt> for textual (string) data. An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier.</p>
3683<p>An indexed field is not required to be part of the datatype associated with a dataset if the dataset&#x2019;s datatype is declared as open <b>and</b> if the field&#x2019;s type is provided along with its name and if the <tt>ENFORCED</tt> keyword is specified at the end of the index definition. <tt>ENFORCING</tt> an open field introduces a check that makes sure that the actual type of the indexed field (if the optional field exists in the object) always matches this specified (open) field type.</p>
3684<p>The following example creates a btree index called gbAuthorIdx on the authorId field of the GleambookMessages dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the author-id field.</p></div>
3685<div class="section">
3686<h4><a name="Example"></a>Example</h4>
3687
3688<div class="source">
3689<div class="source">
3690<pre>CREATE INDEX gbAuthorIdx ON GleambookMessages(authorId) TYPE BTREE;
3691</pre></div></div>
3692<p>The following example creates an open btree index called gbSendTimeIdx on the (non-predeclared) sendTime field of the GleambookMessages dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the sendTime field.</p></div>
3693<div class="section">
3694<h4><a name="Example"></a>Example</h4>
3695
3696<div class="source">
3697<div class="source">
3698<pre>CREATE INDEX gbSendTimeIdx ON GleambookMessages(sendTime: datetime?) TYPE BTREE ENFORCED;
3699</pre></div></div>
3700<p>The following example creates a btree index called crpUserScrNameIdx on screenName, a nested field residing within a object-valued user field in the ChirpMessages dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the nested screenName field. Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field.</p></div>
3701<div class="section">
3702<h4><a name="Example"></a>Example</h4>
3703
3704<div class="source">
3705<div class="source">
3706<pre>CREATE INDEX crpUserScrNameIdx ON ChirpMessages(user.screenName) TYPE BTREE;
3707</pre></div></div>
3708<p>The following example creates an rtree index called gbSenderLocIdx on the sender-location field of the GleambookMessages dataset. This index can be useful for accelerating queries that use the <a href="functions.html#spatial-intersect"><tt>spatial-intersect</tt> function</a> in a predicate involving the sender-location field.</p></div>
3709<div class="section">
3710<h4><a name="Example"></a>Example</h4>
3711
3712<div class="source">
3713<div class="source">
3714<pre>CREATE INDEX gbSenderLocIndex ON GleambookMessages(&quot;sender-location&quot;) TYPE RTREE;
3715</pre></div></div>
3716<p>The following example creates a 3-gram index called fbUserIdx on the name field of the GleambookUsers dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on <a href="similarity.html#NGram_Index">similarity queries</a>.</p></div>
3717<div class="section">
3718<h4><a name="Example"></a>Example</h4>
3719
3720<div class="source">
3721<div class="source">
3722<pre>CREATE INDEX fbUserIdx ON GleambookUsers(name) TYPE NGRAM(3);
3723</pre></div></div>
3724<p>The following example creates a keyword index called fbMessageIdx on the message field of the GleambookMessages dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the message field. For details refer to the document on <a href="similarity.html#Keyword_Index">similarity queries</a>.</p></div>
3725<div class="section">
3726<h4><a name="Example"></a>Example</h4>
3727
3728<div class="source">
3729<div class="source">
3730<pre>CREATE INDEX fbMessageIdx ON GleambookMessages(message) TYPE KEYWORD;
3731</pre></div></div></div></div>
3732<div class="section">
3733<h3><a name="Functions" id="Functions"> Functions</a></h3>
3734<p>The create function statement creates a <b>named</b> function that can then be used and reused in SQL++ queries. The body of a function can be any SQL++ expression involving the function&#x2019;s parameters.</p>
3735
3736<div class="source">
3737<div class="source">
3738<pre>FunctionSpecification ::= &quot;FUNCTION&quot; FunctionOrTypeName IfNotExists ParameterList &quot;{&quot; Expression &quot;}&quot;
3739</pre></div></div>
3740<p>The following is an example of a CREATE FUNCTION statement which is similar to our earlier DECLARE FUNCTION example. It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).</p>
3741<div class="section">
3742<div class="section">
3743<h5><a name="Example"></a>Example</h5>
3744
3745<div class="source">
3746<div class="source">
3747<pre>CREATE FUNCTION friendInfo(userId) {
3748 (SELECT u.id, u.name, len(u.friendIds) AS friendCount
3749 FROM GleambookUsers u
3750 WHERE u.id = userId)[0]
3751 };
3752</pre></div></div></div></div>
3753<div class="section">
3754<h4><a name="Removal"></a>Removal</h4>
3755
3756<div class="source">
3757<div class="source">
3758<pre>DropStatement ::= &quot;DROP&quot; ( &quot;DATAVERSE&quot; Identifier IfExists
3759 | &quot;TYPE&quot; FunctionOrTypeName IfExists
3760 | &quot;DATASET&quot; QualifiedName IfExists
3761 | &quot;INDEX&quot; DoubleQualifiedName IfExists
3762 | &quot;FUNCTION&quot; FunctionSignature IfExists )
3763IfExists ::= ( &quot;IF&quot; &quot;EXISTS&quot; )?
3764</pre></div></div>
3765<p>The DROP statement in SQL++ is the inverse of the CREATE statement. It can be used to drop dataverses, datatypes, datasets, indexes, and functions.</p>
3766<p>The following examples illustrate some uses of the DROP statement.</p>
3767<div class="section">
3768<h5><a name="Example"></a>Example</h5>
3769
3770<div class="source">
3771<div class="source">
3772<pre>DROP DATASET GleambookUsers IF EXISTS;
3773
3774DROP INDEX GleambookMessages.gbSenderLocIndex;
3775
3776DROP TYPE TinySocial2.GleambookUserType;
3777
3778DROP FUNCTION friendInfo@1;
3779
3780DROP DATAVERSE TinySocial;
3781</pre></div></div>
3782<p>When an artifact is dropped, it will be droppped from the current dataverse if none is specified (see the DROP DATASET example above) or from the specified dataverse (see the DROP TYPE example above) if one is specified by fully qualifying the artifact name in the DROP statement. When specifying an index to drop, the index name must be qualified by the dataset that it indexes. When specifying a function to drop, since SQL++ allows functions to be overloaded by their number of arguments, the identifying name of the function to be dropped must explicitly include that information. (<tt>friendInfo@1</tt> above denotes the 1-argument function named friendInfo in the current dataverse.)</p></div></div></div>
3783<div class="section">
3784<h3><a name="ImportExport_Statements"></a>Import/Export Statements</h3>
3785
3786<div class="source">
3787<div class="source">
3788<pre>LoadStatement ::= &lt;LOAD&gt; &lt;DATASET&gt; QualifiedName &lt;USING&gt; AdapterName Configuration ( &lt;PRE-SORTED&gt; )?
3789</pre></div></div>
3790<p>The LOAD statement is used to initially populate a dataset via bulk loading of data from an external file. An appropriate adapter must be selected to handle the nature of the desired external data. The LOAD statement accepts the same adapters and the same parameters as discussed earlier for External datasets. (See the <a href="externaldata.html">guide to external data</a> for more information on the available adapters.) If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.</p>
3791<p>The following example shows how to bulk load the GleambookUsers dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.</p>
3792<div class="section">
3793<div class="section">
3794<h5><a name="Example"></a>Example</h5>
3795
3796<div class="source">
3797<div class="source">
3798<pre> LOAD DATASET GleambookUsers USING localfs
3799 ((&quot;path&quot;=&quot;127.0.0.1:///Users/bignosqlfan/tinysocialnew/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
3800</pre></div></div></div></div></div></div>
3801<div class="section">
3802<h2><a name="Modification_statements" id="Modification_statements">Modification statements</a></h2>
3803<div class="section">
3804<h3><a name="INSERTs"></a><a name="Inserts" id="Inserts">INSERTs</a></h3>
3805
3806<div class="source">
3807<div class="source">
3808<pre>InsertStatement ::= &lt;INSERT&gt; &lt;INTO&gt; QualifiedName Query
3809</pre></div></div>
3810<p>The SQL++ INSERT statement is used to insert new data into a dataset. The data to be inserted comes from a SQL++ query expression. This expression can be as simple as a constant expression, or in general it can be any legal SQL++ query. If the target dataset has an auto-generated primary key field, the insert statement should not include a value for that field in it. (The system will automatically extend the provided object with this additional field and a corresponding value.) Insertion will fail if the dataset already has data with the primary key value(s) being inserted.</p>
3811<p>Inserts are processed transactionally by the system. The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). If the query part of an insert returns a single object, then the INSERT statement will be a single, atomic transaction. If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction. The following example illustrates a query-based insertion.</p>
3812<div class="section">
3813<div class="section">
3814<h5><a name="Example"></a>Example</h5>
3815
3816<div class="source">
3817<div class="source">
3818<pre>INSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user)
3819</pre></div></div></div></div></div>
3820<div class="section">
3821<h3><a name="UPSERTs"></a><a name="Upserts" id="Upserts">UPSERTs</a></h3>
3822
3823<div class="source">
3824<div class="source">
3825<pre>UpsertStatement ::= &lt;UPSERT&gt; &lt;INTO&gt; QualifiedName Query
3826</pre></div></div>
3827<p>The SQL++ UPSERT statement syntactically mirrors the INSERT statement discussed above. The difference lies in its semantics, which for UPSERT are &#x201c;add or replace&#x201d; instead of the INSERT &#x201c;add if not present, else error&#x201d; semantics. Whereas an INSERT can fail if another object already exists with the specified key, the analogous UPSERT will replace the previous object&#x2019;s value with that of the new object in such cases.</p>
3828<p>The following example illustrates a query-based upsert operation.</p>
3829<div class="section">
3830<div class="section">
3831<h5><a name="Example"></a>Example</h5>
3832
3833<div class="source">
3834<div class="source">
3835<pre>UPSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user)
3836</pre></div></div>
3837<p>*Editor&#x2019;s note: Upserts currently work in AQL but are not yet enabled (at the moment) in SQL++.</p></div></div></div>
3838<div class="section">
3839<h3><a name="DELETEs"></a><a name="Deletes" id="Deletes">DELETEs</a></h3>
3840
3841<div class="source">
3842<div class="source">
3843<pre>DeleteStatement ::= &lt;DELETE&gt; &lt;FROM&gt; QualifiedName ( ( &lt;AS&gt; )? Variable )? ( &lt;WHERE&gt; Expression )?
3844</pre></div></div>
3845<p>The SQL++ DELETE statement is used to delete data from a target dataset. The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the DELETE statement.</p>
3846<p>Deletes are processed transactionally by the system. The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). If the boolean expression for a delete identifies a single object, then the DELETE statement itself will be a single, atomic transaction. If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.</p>
3847<p>The following examples illustrate single-object deletions.</p>
3848<div class="section">
3849<div class="section">
3850<h5><a name="Example"></a>Example</h5>
3851
3852<div class="source">
3853<div class="source">
3854<pre>DELETE FROM GleambookUsers user WHERE user.id = 8;
3855</pre></div></div></div>
3856<div class="section">
3857<h5><a name="Example"></a>Example</h5>
3858
3859<div class="source">
3860<div class="source">
3861<pre>DELETE FROM GleambookUsers WHERE id = 5;
3862</pre></div></div>
3863<h1><a name="Reserved_keywords" id="Reserved_keywords">Appendix 1. Reserved keywords</a></h1>
3864<p>All reserved keywords are listed in the following table:</p>
3865
3866<table border="0" class="table table-striped">
3867 <thead>
3868
3869<tr class="a">
3870
3871<th> </th>
3872
3873<th> </th>
3874
3875<th> </th>
3876
3877<th> </th>
3878
3879<th> </th>
3880
3881<th> </th>
3882 </tr>
3883 </thead>
3884 <tbody>
3885
3886<tr class="b">
3887
3888<td>AND </td>
3889
3890<td>ANY </td>
3891
3892<td>APPLY </td>
3893
3894<td>AS </td>
3895
3896<td>ASC </td>
3897
3898<td>AT </td>
3899 </tr>
3900
3901<tr class="a">
3902
3903<td>AUTOGENERATED </td>
3904
3905<td>BETWEEN </td>
3906
3907<td>BTREE </td>
3908
3909<td>BY </td>
3910
3911<td>CASE </td>
3912
3913<td>CLOSED </td>
3914 </tr>
3915
3916<tr class="b">
3917
3918<td>CREATE </td>
3919
3920<td>COMPACTION </td>
3921
3922<td>COMPACT </td>
3923
3924<td>CONNECT </td>
3925
3926<td>CORRELATE </td>
3927
3928<td>DATASET </td>
3929 </tr>
3930
3931<tr class="a">
3932
3933<td>COLLECTION </td>
3934
3935<td>DATAVERSE </td>
3936
3937<td>DECLARE </td>
3938
3939<td>DEFINITION </td>
3940
3941<td>DECLARE </td>
3942
3943<td>DEFINITION </td>
3944 </tr>
3945
3946<tr class="b">
3947
3948<td>DELETE </td>
3949
3950<td>DESC </td>
3951
3952<td>DISCONNECT </td>
3953
3954<td>DISTINCT </td>
3955
3956<td>DROP </td>
3957
3958<td>ELEMENT </td>
3959 </tr>
3960
3961<tr class="a">
3962
3963<td>ELEMENT </td>
3964
3965<td>EXPLAIN </td>
3966
3967<td>ELSE </td>
3968
3969<td>ENFORCED </td>
3970
3971<td>END </td>
3972
3973<td>EVERY </td>
3974 </tr>
3975
3976<tr class="b">
3977
3978<td>EXCEPT </td>
3979
3980<td>EXIST </td>
3981
3982<td>EXTERNAL </td>
3983
3984<td>FEED </td>
3985
3986<td>FILTER </td>
3987
3988<td>FLATTEN </td>
3989 </tr>
3990
3991<tr class="a">
3992
3993<td>FOR </td>
3994
3995<td>FROM </td>
3996
3997<td>FULL </td>
3998
3999<td>FUNCTION </td>
4000
4001<td>GROUP </td>
4002
4003<td>HAVING </td>
4004 </tr>
4005
4006<tr class="b">
4007
4008<td>HINTS </td>
4009
4010<td>IF </td>
4011
4012<td>INTO </td>
4013
4014<td>IN </td>
4015
4016<td>INDEX </td>
4017
4018<td>INGESTION </td>
4019 </tr>
4020
4021<tr class="a">
4022
4023<td>INNER </td>
4024
4025<td>INSERT </td>
4026
4027<td>INTERNAL </td>
4028
4029<td>INTERSECT </td>
4030
4031<td>IS </td>
4032
4033<td>JOIN </td>
4034 </tr>
4035
4036<tr class="b">
4037
4038<td>KEYWORD </td>
4039
4040<td>LEFT </td>
4041
4042<td>LETTING </td>
4043
4044<td>LET </td>
4045
4046<td>LIKE </td>
4047
4048<td>LIMIT </td>
4049 </tr>
4050
4051<tr class="a">
4052
4053<td>LOAD </td>
4054
4055<td>NODEGROUP </td>
4056
4057<td>NGRAM </td>
4058
4059<td>NOT </td>
4060
4061<td>OFFSET </td>
4062
4063<td>ON </td>
4064 </tr>
4065
4066<tr class="b">
4067
4068<td>OPEN </td>
4069
4070<td>OR </td>
4071
4072<td>ORDER </td>
4073
4074<td>OUTER </td>
4075
4076<td>OUTPUT </td>
4077
4078<td>PATH </td>
4079 </tr>
4080
4081<tr class="a">
4082
4083<td>POLICY </td>
4084
4085<td>PRE-SORTED </td>
4086
4087<td>PRIMARY </td>
4088
4089<td>RAW </td>
4090
4091<td>REFRESH </td>
4092
4093<td>RETURN </td>
4094 </tr>
4095
4096<tr class="b">
4097
4098<td>RTREE </td>
4099
4100<td>RUN </td>
4101
4102<td>SATISFIES </td>
4103
4104<td>SECONDARY </td>
4105
4106<td>SELECT </td>
4107
4108<td>SET </td>
4109 </tr>
4110
4111<tr class="a">
4112
4113<td>SOME </td>
4114
4115<td>TEMPORARY </td>
4116
4117<td>THEN </td>
4118
4119<td>TYPE </td>
4120
4121<td>UNKNOWN </td>
4122
4123<td>UNNEST </td>
4124 </tr>
4125
4126<tr class="b">
4127
4128<td>UPDATE </td>
4129
4130<td>USE </td>
4131
4132<td>USING </td>
4133
4134<td>VALUE </td>
4135
4136<td>WHEN </td>
4137
4138<td>WHERE </td>
4139 </tr>
4140
4141<tr class="a">
4142
4143<td>WITH </td>
4144
4145<td>WRITE </td>
4146
4147<td> </td>
4148
4149<td> </td>
4150
4151<td> </td>
4152
4153<td> </td>
4154 </tr>
4155 </tbody>
4156</table></div></div></div></div>
4157 </div>
4158 </div>
4159 </div>
4160
4161 <hr/>
4162
4163 <footer>
4164 <div class="container-fluid">
4165 <div class="row span12">Copyright &copy; 2017
4166 <a href="https://www.apache.org/">The Apache Software Foundation</a>.
4167 All Rights Reserved.
4168
4169 </div>
4170
4171 <?xml version="1.0" encoding="UTF-8"?>
4172<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
4173 feather logo, and the Apache AsterixDB project logo are either
4174 registered trademarks or trademarks of The Apache Software
4175 Foundation in the United States and other countries.
4176 All other marks mentioned may be trademarks or registered
4177 trademarks of their respective owners.</div>
4178
4179
4180 </div>
4181 </footer>
4182 </body>
4183</html>