blob: a2499d9aca5dc41efe1a1fffbfeeb561c2d924c8 [file] [log] [blame]
Ian Maxonf9dfc032017-09-14 21:37:24 +02001<!DOCTYPE html>
2<!--
3 | Generated by Apache Maven Doxia at 2017-09-14
4 | Rendered using Apache Maven Fluido Skin 1.3.0
5-->
6<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7 <head>
8 <meta charset="UTF-8" />
9 <meta name="viewport" content="width=device-width, initial-scale=1.0" />
10 <meta name="Date-Revision-yyyymmdd" content="20170914" />
11 <meta http-equiv="Content-Language" content="en" />
12 <title>AsterixDB &#x2013; The SQL++ Query Language</title>
13 <link rel="stylesheet" href="../css/apache-maven-fluido-1.3.0.min.css" />
14 <link rel="stylesheet" href="../css/site.css" />
15 <link rel="stylesheet" href="../css/print.css" media="print" />
16
17
18 <script type="text/javascript" src="../js/apache-maven-fluido-1.3.0.min.js"></script>
19
20
21
22<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
23 (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
24 m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
25 })(window,document,'script','//www.google-analytics.com/analytics.js','ga');
26
27 ga('create', 'UA-41536543-1', 'uci.edu');
28 ga('send', 'pageview');</script>
29
30 </head>
31 <body class="topBarDisabled">
32
33
34
35
36 <div class="container-fluid">
37 <div id="banner">
38 <div class="pull-left">
39 <a href=".././" id="bannerLeft">
40 <img src="../images/asterixlogo.png" alt="AsterixDB"/>
41 </a>
42 </div>
43 <div class="pull-right"> </div>
44 <div class="clear"><hr/></div>
45 </div>
46
47 <div id="breadcrumbs">
48 <ul class="breadcrumb">
49
50
51 <li id="publishDate">Last Published: 2017-09-14</li>
52
53
54
55 <li id="projectVersion" class="pull-right">Version: 0.9.2</li>
56
57 <li class="divider pull-right">|</li>
58
59 <li class="pull-right"> <a href="../index.html" title="Documentation Home">
60 Documentation Home</a>
61 </li>
62
63 </ul>
64 </div>
65
66
67 <div class="row-fluid">
68 <div id="leftColumn" class="span3">
69 <div class="well sidebar-nav">
70
71
72 <ul class="nav nav-list">
73 <li class="nav-header">Get Started - Installation</li>
74
75 <li>
76
77 <a href="../ncservice.html" title="Option 1: using NCService">
78 <i class="none"></i>
79 Option 1: using NCService</a>
80 </li>
81
82 <li>
83
84 <a href="../ansible.html" title="Option 2: using Ansible">
85 <i class="none"></i>
86 Option 2: using Ansible</a>
87 </li>
88
89 <li>
90
91 <a href="../aws.html" title="Option 3: using Amazon Web Services">
92 <i class="none"></i>
93 Option 3: using Amazon Web Services</a>
94 </li>
95
96 <li>
97
98 <a href="../yarn.html" title="Option 4: using YARN">
99 <i class="none"></i>
100 Option 4: using YARN</a>
101 </li>
102
103 <li>
104
105 <a href="../install.html" title="Option 5: using Managix (deprecated)">
106 <i class="none"></i>
107 Option 5: using Managix (deprecated)</a>
108 </li>
109 <li class="nav-header">AsterixDB Primer</li>
110
111 <li>
112
113 <a href="../sqlpp/primer-sqlpp.html" title="Option 1: using SQL++">
114 <i class="none"></i>
115 Option 1: using SQL++</a>
116 </li>
117
118 <li>
119
120 <a href="../aql/primer.html" title="Option 2: using AQL">
121 <i class="none"></i>
122 Option 2: using AQL</a>
123 </li>
124 <li class="nav-header">Data Model</li>
125
126 <li>
127
128 <a href="../datamodel.html" title="The Asterix Data Model">
129 <i class="none"></i>
130 The Asterix Data Model</a>
131 </li>
132 <li class="nav-header">Queries - SQL++</li>
133
134 <li class="active">
135
136 <a href="#"><i class="none"></i>The SQL++ Query Language</a>
137 </li>
138
139 <li>
140
141 <a href="../sqlpp/builtins.html" title="Builtin Functions">
142 <i class="none"></i>
143 Builtin Functions</a>
144 </li>
145 <li class="nav-header">Queries - AQL</li>
146
147 <li>
148
149 <a href="../aql/manual.html" title="The Asterix Query Language (AQL)">
150 <i class="none"></i>
151 The Asterix Query Language (AQL)</a>
152 </li>
153
154 <li>
155
156 <a href="../aql/builtins.html" title="Builtin Functions">
157 <i class="none"></i>
158 Builtin Functions</a>
159 </li>
160 <li class="nav-header">API/SDK</li>
161
162 <li>
163
164 <a href="../api.html" title="HTTP API">
165 <i class="none"></i>
166 HTTP API</a>
167 </li>
168
169 <li>
170
171 <a href="../csv.html" title="CSV Output">
172 <i class="none"></i>
173 CSV Output</a>
174 </li>
175 <li class="nav-header">Advanced Features</li>
176
177 <li>
178
179 <a href="../aql/fulltext.html" title="Support of Full-text Queries">
180 <i class="none"></i>
181 Support of Full-text Queries</a>
182 </li>
183
184 <li>
185
186 <a href="../aql/externaldata.html" title="Accessing External Data">
187 <i class="none"></i>
188 Accessing External Data</a>
189 </li>
190
191 <li>
192
193 <a href="../feeds/tutorial.html" title="Support for Data Ingestion">
194 <i class="none"></i>
195 Support for Data Ingestion</a>
196 </li>
197
198 <li>
199
200 <a href="../udf.html" title="User Defined Functions">
201 <i class="none"></i>
202 User Defined Functions</a>
203 </li>
204
205 <li>
206
207 <a href="../aql/filters.html" title="Filter-Based LSM Index Acceleration">
208 <i class="none"></i>
209 Filter-Based LSM Index Acceleration</a>
210 </li>
211
212 <li>
213
214 <a href="../aql/similarity.html" title="Support of Similarity Queries">
215 <i class="none"></i>
216 Support of Similarity Queries</a>
217 </li>
218 </ul>
219
220
221
222 <hr class="divider" />
223
224 <div id="poweredBy">
225 <div class="clear"></div>
226 <div class="clear"></div>
227 <div class="clear"></div>
228 <a href=".././" title="AsterixDB" class="builtBy">
229 <img class="builtBy" alt="AsterixDB" src="../images/asterixlogo.png" />
230 </a>
231 </div>
232 </div>
233 </div>
234
235
236 <div id="bodyColumn" class="span9" >
237
238 <!-- ! Licensed to the Apache Software Foundation (ASF) under one
239 ! or more contributor license agreements. See the NOTICE file
240 ! distributed with this work for additional information
241 ! regarding copyright ownership. The ASF licenses this file
242 ! to you under the Apache License, Version 2.0 (the
243 ! "License"); you may not use this file except in compliance
244 ! with the License. You may obtain a copy of the License at
245 !
246 ! http://www.apache.org/licenses/LICENSE-2.0
247 !
248 ! Unless required by applicable law or agreed to in writing,
249 ! software distributed under the License is distributed on an
250 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
251 ! KIND, either express or implied. See the License for the
252 ! specific language governing permissions and limitations
253 ! under the License.
254 ! --><h1>The SQL++ Query Language</h1>
255
256<ul>
257
258<li><a href="#Introduction">1. Introduction</a></li>
259
260<li><a href="#Expressions">2. Expressions</a>
261
262<ul>
263
264<li><a href="#Operator_expressions">Operator Expressions</a>
265
266<ul>
267
268<li><a href="#Arithmetic_operators">Arithmetic Operators</a></li>
269
270<li><a href="#Collection_operators">Collection Operators</a></li>
271
272<li><a href="#Comparison_operators">Comparison Operators</a></li>
273
274<li><a href="#Logical_operators">Logical Operators</a></li>
275 </ul></li>
276
277<li><a href="#Case_expressions">Case Expressions</a></li>
278
279<li><a href="#Quantified_expressions">Quantified Expressions</a></li>
280
281<li><a href="#Path_expressions">Path Expressions</a></li>
282
283<li><a href="#Primary_expressions">Primary Expressions</a>
284
285<ul>
286
287<li><a href="#Literals">Literals</a></li>
288
289<li><a href="#Variable_references">Variable References</a></li>
290
291<li><a href="#Parenthesized_expressions">Parenthesized Expressions</a></li>
292
293<li><a href="#Function_call_expressions">Function call Expressions</a></li>
294
295<li><a href="#Constructors">Constructors</a></li>
296 </ul></li>
297 </ul></li>
298
299<li><a href="#Queries">3. Queries</a>
300
301<ul>
302
303<li><a href="#Declarations">Declarations</a></li>
304
305<li><a href="#SELECT_statements">SELECT Statements</a></li>
306
307<li><a href="#Select_clauses">SELECT Clauses</a>
308
309<ul>
310
311<li><a href="#Select_element">Select Element/Value/Raw</a></li>
312
313<li><a href="#SQL_select">SQL-style Select</a></li>
314
315<li><a href="#Select_star">Select *</a></li>
316
317<li><a href="#Select_distinct">Select Distinct</a></li>
318
319<li><a href="#Unnamed_projections">Unnamed Projections</a></li>
320
321<li><a href="#Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></li>
322 </ul></li>
323
324<li><a href="#Unnest_clauses">UNNEST Clauses</a>
325
326<ul>
327
328<li><a href="#Inner_unnests">Inner Unnests</a></li>
329
330<li><a href="#Left_outer_unnests">Left Outer Unnests</a></li>
331
332<li><a href="#Expressing_joins_using_unnests">Expressing Joins Using Unnests</a></li>
333 </ul></li>
334
335<li><a href="#From_clauses">FROM clauses</a>
336
337<ul>
338
339<li><a href="#Binding_expressions">Binding Expressions</a></li>
340
341<li><a href="#Multiple_from_terms">Multiple From Terms</a></li>
342
343<li><a href="#Expressing_joins_using_from_terms">Expressing Joins Using From Terms</a></li>
344
345<li><a href="#Implicit_binding_variables">Implicit Binding Variables</a></li>
346 </ul></li>
347
348<li><a href="#Join_clauses">JOIN Clauses</a>
349
350<ul>
351
352<li><a href="#Inner_joins">Inner Joins</a></li>
353
354<li><a href="#Left_outer_joins">Left Outer Joins</a></li>
355 </ul></li>
356
357<li><a href="#Group_By_clauses">GROUP BY Clauses</a>
358
359<ul>
360
361<li><a href="#Group_variables">Group Variables</a></li>
362
363<li><a href="#Implicit_group_key_variables">Implicit Group Key Variables</a></li>
364
365<li><a href="#Implicit_group_variables">Implicit Group Variables</a></li>
366
367<li><a href="#Aggregation_functions">Aggregation Functions</a></li>
368
369<li><a href="#SQL-92_aggregation_functions">SQL-92 Aggregation Functions</a></li>
370
371<li><a href="#SQL-92_compliant_gby">SQL-92 Compliant GROUP BY Aggregations</a></li>
372
373<li><a href="#Column_aliases">Column Aliases</a></li>
374 </ul></li>
375
376<li><a href="#Where_having_clauses">WHERE Clauses and HAVING Clauses</a></li>
377
378<li><a href="#Order_By_clauses">ORDER BY Clauses</a></li>
379
380<li><a href="#Limit_clauses">LIMIT Clauses</a></li>
381
382<li><a href="#With_clauses">WITH Clauses</a></li>
383
384<li><a href="#Let_clauses">LET Clauses</a></li>
385
386<li><a href="#Union_all">UNION ALL</a></li>
387
388<li><a href="#Vs_SQL-92">SQL++ Vs. SQL-92</a></li>
389 </ul></li>
390
391<li><a href="#Errors">4. Errors</a>
392
393<ul>
394
395<li><a href="#Syntax_errors">Syntax Errors</a></li>
396
397<li><a href="#Identifier_resolution_errors">Identifier Resolution Errors</a></li>
398
399<li><a href="#Type_errors">Type Errors</a></li>
400
401<li><a href="#Resource_errors">Resource Errors</a></li>
402 </ul></li>
403
404<li><a href="#DDL_and_DML_statements">5. DDL and DML Statements</a>
405
406<ul>
407
408<li><a href="#Lifecycle_management_statements">Lifecycle Management Statements</a>
409
410<ul>
411
412<li><a href="#Dataverses">Dataverses</a></li>
413
414<li><a href="#Types">Types</a></li>
415
416<li><a href="#Datasets">Datasets</a></li>
417
418<li><a href="#Indices">Indices</a></li>
419
420<li><a href="#Functions">Functions</a></li>
421
422<li><a href="#Removal">Removal</a></li>
423
424<li><a href="#Load_statement">Load Statement</a></li>
425 </ul></li>
426
427<li><a href="#Modification_statements">Modification Statements</a>
428
429<ul>
430
431<li><a href="#Inserts">Inserts</a></li>
432
433<li><a href="#Upserts">Upserts</a></li>
434
435<li><a href="#Deletes">Deletes</a></li>
436 </ul></li>
437 </ul></li>
438
439<li><a href="#Reserved_keywords">Appendix 1. Reserved Keywords</a></li>
440
441<li><a href="#Performance_tuning">Appendix 2. Performance Tuning</a>
442
443<ul>
444
445<li><a href="#Parallelism_parameter">Parallelism Parameter</a></li>
446
447<li><a href="#Memory_parameters">Memory Parameters</a></li>
448 </ul></li>
449</ul>
450<!-- ! Licensed to the Apache Software Foundation (ASF) under one
451 ! or more contributor license agreements. See the NOTICE file
452 ! distributed with this work for additional information
453 ! regarding copyright ownership. The ASF licenses this file
454 ! to you under the Apache License, Version 2.0 (the
455 ! "License"); you may not use this file except in compliance
456 ! with the License. You may obtain a copy of the License at
457 !
458 ! http://www.apache.org/licenses/LICENSE-2.0
459 !
460 ! Unless required by applicable law or agreed to in writing,
461 ! software distributed under the License is distributed on an
462 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
463 ! KIND, either express or implied. See the License for the
464 ! specific language governing permissions and limitations
465 ! under the License.
466 ! -->
467<h1><a name="Introduction" id="Introduction">1. Introduction</a><font size="3" /></h1>
468<p>This document is intended as a reference guide to the full syntax and semantics of the SQL++ Query Language, a SQL-inspired language for working with semistructured data. SQL++ has much in common with SQL, but some differences do exist due to the different data models that the two languages were designed to serve. SQL was designed in the 1970&#x2019;s for interacting with the flat, schema-ified world of relational databases, while SQL++ is much newer and targets the nested, schema-optional (or even schema-less) world of modern NoSQL systems.</p>
469<p>In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model (<a href="../datamodel.html">ADM</a>),a data model based on a superset of JSON with an enriched and flexible type system. New AsterixDB users are encouraged to read and work through the (much friendlier) guide &#x201c;<a href="primer-sqlpp.html">AsterixDB 101: An ADM and SQL++ Primer</a>&#x201d; before attempting to make use of this document. In addition, readers are advised to read through the <a href="../datamodel.html">Asterix Data Model (ADM) reference guide</a> first as well, as an understanding of the data model is a prerequisite to understanding SQL++.</p>
470<p>In what follows, we detail the features of the SQL++ language in a grammar-guided manner. We list and briefly explain each of the productions in the SQL++ grammar, offering examples (and results) for clarity.</p>
471<!-- ! Licensed to the Apache Software Foundation (ASF) under one
472 ! or more contributor license agreements. See the NOTICE file
473 ! distributed with this work for additional information
474 ! regarding copyright ownership. The ASF licenses this file
475 ! to you under the Apache License, Version 2.0 (the
476 ! "License"); you may not use this file except in compliance
477 ! with the License. You may obtain a copy of the License at
478 !
479 ! http://www.apache.org/licenses/LICENSE-2.0
480 !
481 ! Unless required by applicable law or agreed to in writing,
482 ! software distributed under the License is distributed on an
483 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
484 ! KIND, either express or implied. See the License for the
485 ! specific language governing permissions and limitations
486 ! under the License.
487 ! -->
488<h1><a name="Expressions" id="Expressions">2. Expressions</a></h1>
489<!-- ! Licensed to the Apache Software Foundation (ASF) under one
490 ! or more contributor license agreements. See the NOTICE file
491 ! distributed with this work for additional information
492 ! regarding copyright ownership. The ASF licenses this file
493 ! to you under the Apache License, Version 2.0 (the
494 ! "License"); you may not use this file except in compliance
495 ! with the License. You may obtain a copy of the License at
496 !
497 ! http://www.apache.org/licenses/LICENSE-2.0
498 !
499 ! Unless required by applicable law or agreed to in writing,
500 ! software distributed under the License is distributed on an
501 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
502 ! KIND, either express or implied. See the License for the
503 ! specific language governing permissions and limitations
504 ! under the License.
505 ! -->
506<p>SQL++ is a highly composable expression language. Each SQL++ expression returns zero or more data model instances. There are three major kinds of expressions in SQL++. At the topmost level, a SQL++ expression can be an OperatorExpression (similar to a mathematical expression), an ConditionalExpression (to choose between alternative values), or a QuantifiedExpression (which yields a boolean value). Each will be detailed as we explore the full SQL++ grammar.</p>
507
508<div class="source">
509<div class="source">
510<pre>Expression ::= OperatorExpression | CaseExpression | QuantifiedExpression
511</pre></div></div>
512<p>Note that in the following text, words enclosed in angle brackets denote keywords that are not case-sensitive.</p>
513<div class="section">
514<h2><a name="Operator_Expressions"></a><a name="Operator_expressions" id="Operator_expressions">Operator Expressions</a></h2>
515<p>Operators perform a specific operation on the input values or expressions. The syntax of an operator expression is as follows:</p>
516
517<div class="source">
518<div class="source">
519<pre>OperatorExpression ::= PathExpression
520 | Operator OperatorExpression
521 | OperatorExpression Operator (OperatorExpression)?
522 | OperatorExpression &lt;BETWEEN&gt; OperatorExpression &lt;AND&gt; OperatorExpression
523</pre></div></div>
524<p>SQL++ provides a full set of operators that you can use within its statements. Here are the categories of operators:</p>
525
526<ul>
527
528<li><a href="#Arithmetic_operators">Arithmetic Operators</a>, to perform basic mathematical operations;</li>
529
530<li><a href="#Collection_operators">Collection Operators</a>, to evaluate expressions on collections or objects;</li>
531
532<li><a href="#Comparison_operators">Comparison Operators</a>, to compare two expressions;</li>
533
534<li><a href="#Logical_operators">Logical Operators</a>, to combine operators using Boolean logic.</li>
535</ul>
536<p>The following table summarizes the precedence order (from higher to lower) of the major unary and binary operators:</p>
537
538<table border="0" class="table table-striped">
539 <thead>
540
541<tr class="a">
542
543<th>Operator </th>
544
545<th>Operation </th>
546 </tr>
547 </thead>
548 <tbody>
549
550<tr class="b">
551
552<td>EXISTS, NOT EXISTS </td>
553
554<td>Collection emptiness testing </td>
555 </tr>
556
557<tr class="a">
558
559<td>^ </td>
560
561<td>Exponentiation </td>
562 </tr>
563
564<tr class="b">
565
566<td>*, /, % </td>
567
568<td>Multiplication, division, modulo </td>
569 </tr>
570
571<tr class="a">
572
573<td>+, - </td>
574
575<td>Addition, subtraction </td>
576 </tr>
577
578<tr class="b">
579
580<td>|| </td>
581
582<td>String concatenation </td>
583 </tr>
584
585<tr class="a">
586
587<td>IS NULL, IS NOT NULL, IS MISSING, IS NOT MISSING, <br />IS UNKNOWN, IS NOT UNKNOWN</td>
588
589<td>Unknown value comparison </td>
590 </tr>
591
592<tr class="b">
593
594<td>BETWEEN, NOT BETWEEN </td>
595
596<td>Range comparison (inclusive on both sides) </td>
597 </tr>
598
599<tr class="a">
600
601<td>=, !=, &lt;&gt;, &lt;, &gt;, &lt;=, &gt;=, LIKE, NOT LIKE, IN, NOT IN </td>
602
603<td>Comparison </td>
604 </tr>
605
606<tr class="b">
607
608<td>NOT </td>
609
610<td>Logical negation </td>
611 </tr>
612
613<tr class="a">
614
615<td>AND </td>
616
617<td>Conjunction </td>
618 </tr>
619
620<tr class="b">
621
622<td>OR </td>
623
624<td>Disjunction </td>
625 </tr>
626 </tbody>
627</table>
628<p>In general, if any operand evaluates to a <tt>MISSING</tt> value, the enclosing operator will return <tt>MISSING</tt>; if none of operands evaluates to a <tt>MISSING</tt> value but there is an operand evaluates to a <tt>NULL</tt> value, the enclosing operator will return <tt>NULL</tt>. However, there are a few exceptions listed in <a href="#Comparison_operators">comparison operators</a> and <a href="#Logical_operators">logical operators</a>.</p>
629<div class="section">
630<h3><a name="Arithmetic_Operators"></a><a name="Arithmetic_operators" id="Arithmetic_operators">Arithmetic Operators</a></h3>
631<p>Arithmetic operators are used to exponentiate, add, subtract, multiply, and divide numeric values, or concatenate string values.</p>
632
633<table border="0" class="table table-striped">
634 <thead>
635
636<tr class="a">
637
638<th>Operator </th>
639
640<th>Purpose </th>
641
642<th>Example </th>
643 </tr>
644 </thead>
645 <tbody>
646
647<tr class="b">
648
649<td>+, - </td>
650
651<td>As unary operators, they denote a <br />positive or negative expression </td>
652
653<td>SELECT VALUE -1; </td>
654 </tr>
655
656<tr class="a">
657
658<td>+, - </td>
659
660<td>As binary operators, they add or subtract </td>
661
662<td>SELECT VALUE 1 + 2; </td>
663 </tr>
664
665<tr class="b">
666
667<td>*, /, % </td>
668
669<td>Multiply, divide, modulo </td>
670
671<td>SELECT VALUE 4 / 2.0; </td>
672 </tr>
673
674<tr class="a">
675
676<td>^ </td>
677
678<td>Exponentiation </td>
679
680<td>SELECT VALUE 2^3; </td>
681 </tr>
682
683<tr class="b">
684
685<td>|| </td>
686
687<td>String concatenation </td>
688
689<td>SELECT VALUE &#x201c;ab&#x201d;||&#x201c;c&#x201d;||&#x201c;d&#x201d;; </td>
690 </tr>
691 </tbody>
692</table></div>
693<div class="section">
694<h3><a name="Collection_Operators"></a><a name="Collection_operators" id="Collection_operators">Collection Operators</a></h3>
695<p>Collection operators are used for membership tests (IN, NOT IN) or empty collection tests (EXISTS, NOT EXISTS).</p>
696
697<table border="0" class="table table-striped">
698 <thead>
699
700<tr class="a">
701
702<th>Operator </th>
703
704<th>Purpose </th>
705
706<th>Example </th>
707 </tr>
708 </thead>
709 <tbody>
710
711<tr class="b">
712
713<td>IN </td>
714
715<td>Membership test </td>
716
717<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.lang IN [&#x201c;en&#x201d;, &#x201c;de&#x201d;]; </td>
718 </tr>
719
720<tr class="a">
721
722<td>NOT IN </td>
723
724<td>Non-membership test </td>
725
726<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.lang NOT IN [&#x201c;en&#x201d;]; </td>
727 </tr>
728
729<tr class="b">
730
731<td>EXISTS </td>
732
733<td>Check whether a collection is not empty </td>
734
735<td>SELECT * FROM ChirpMessages cm <br />WHERE EXISTS cm.referredTopics; </td>
736 </tr>
737
738<tr class="a">
739
740<td>NOT EXISTS </td>
741
742<td>Check whether a collection is empty </td>
743
744<td>SELECT * FROM ChirpMessages cm <br />WHERE NOT EXISTS cm.referredTopics; </td>
745 </tr>
746 </tbody>
747</table></div>
748<div class="section">
749<h3><a name="Comparison_Operators"></a><a name="Comparison_operators" id="Comparison_operators">Comparison Operators</a></h3>
750<p>Comparison operators are used to compare values. The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons. SQL++ (and JSON) has two ways of representing missing information in a object - the presence of the field with a NULL for its value (as in SQL), and the absence of the field (which JSON permits). For example, the first of the following objects represents Jack, whose friend is Jill. In the other examples, Jake is friendless a la SQL, with a friend field that is NULL, while Joe is friendless in a more natural (for JSON) way, i.e., by not having a friend field.</p>
751<div class="section">
752<div class="section">
753<h5><a name="Examples"></a>Examples</h5>
754<p>{&#x201c;name&#x201d;: &#x201c;Jack&#x201d;, &#x201c;friend&#x201d;: &#x201c;Jill&#x201d;}</p>
755<p>{&#x201c;name&#x201d;: &#x201c;Jake&#x201d;, &#x201c;friend&#x201d;: NULL}</p>
756<p>{&#x201c;name&#x201d;: &#x201c;Joe&#x201d;}</p>
757<p>The following table enumerates all of SQL++&#x2019;s comparison operators.</p>
758
759<table border="0" class="table table-striped">
760 <thead>
761
762<tr class="a">
763
764<th>Operator </th>
765
766<th>Purpose </th>
767
768<th>Example </th>
769 </tr>
770 </thead>
771 <tbody>
772
773<tr class="b">
774
775<td>IS NULL </td>
776
777<td>Test if a value is NULL </td>
778
779<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NULL; </td>
780 </tr>
781
782<tr class="a">
783
784<td>IS NOT NULL </td>
785
786<td>Test if a value is not NULL </td>
787
788<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT NULL; </td>
789 </tr>
790
791<tr class="b">
792
793<td>IS MISSING </td>
794
795<td>Test if a value is MISSING </td>
796
797<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS MISSING; </td>
798 </tr>
799
800<tr class="a">
801
802<td>IS NOT MISSING </td>
803
804<td>Test if a value is not MISSING </td>
805
806<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT MISSING;</td>
807 </tr>
808
809<tr class="b">
810
811<td>IS UNKNOWN </td>
812
813<td>Test if a value is NULL or MISSING </td>
814
815<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS UNKNOWN; </td>
816 </tr>
817
818<tr class="a">
819
820<td>IS NOT UNKNOWN </td>
821
822<td>Test if a value is neither NULL nor MISSING </td>
823
824<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name IS NOT UNKNOWN;</td>
825 </tr>
826
827<tr class="b">
828
829<td>BETWEEN </td>
830
831<td>Test if a value is between a start value and <br />a end value. The comparison is inclusive <br />to both start and end values. </td>
832
833<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId BETWEEN 10 AND 20;</td>
834 </tr>
835
836<tr class="a">
837
838<td>= </td>
839
840<td>Equality test </td>
841
842<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId=10; </td>
843 </tr>
844
845<tr class="b">
846
847<td>!= </td>
848
849<td>Inequality test </td>
850
851<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId!=10;</td>
852 </tr>
853
854<tr class="a">
855
856<td>&lt;&gt; </td>
857
858<td>Inequality test </td>
859
860<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&lt;&gt;10;</td>
861 </tr>
862
863<tr class="b">
864
865<td>&lt; </td>
866
867<td>Less than </td>
868
869<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&lt;10; </td>
870 </tr>
871
872<tr class="a">
873
874<td>&gt; </td>
875
876<td>Greater than </td>
877
878<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&gt;10; </td>
879 </tr>
880
881<tr class="b">
882
883<td>&lt;= </td>
884
885<td>Less than or equal to </td>
886
887<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&lt;=10; </td>
888 </tr>
889
890<tr class="a">
891
892<td>&gt;= </td>
893
894<td>Greater than or equal to </td>
895
896<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.chirpId&gt;=10; </td>
897 </tr>
898
899<tr class="b">
900
901<td>LIKE </td>
902
903<td>Test if the left side matches a<br /> pattern defined on the right<br /> side; in the pattern, &#x201c;%&#x201d; matches <br />any string while &#x201c;_&#x201d; matches <br /> any character. </td>
904
905<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name LIKE &#x201c;%Giesen%&#x201d;;</td>
906 </tr>
907
908<tr class="a">
909
910<td>NOT LIKE </td>
911
912<td>Test if the left side does not <br />match a pattern defined on the right<br /> side; in the pattern, &#x201c;%&#x201d; matches <br />any string while &#x201c;_&#x201d; matches <br /> any character. </td>
913
914<td>SELECT * FROM ChirpMessages cm <br />WHERE cm.user.name NOT LIKE &#x201c;%Giesen%&#x201d;;</td>
915 </tr>
916 </tbody>
917</table>
918<p>The following table summarizes how the missing value comparison operators work.</p>
919
920<table border="0" class="table table-striped">
921 <thead>
922
923<tr class="a">
924
925<th>Operator </th>
926
927<th>Non-NULL/Non-MISSING value </th>
928
929<th>NULL </th>
930
931<th>MISSING </th>
932 </tr>
933 </thead>
934 <tbody>
935
936<tr class="b">
937
938<td>IS NULL </td>
939
940<td>FALSE </td>
941
942<td>TRUE </td>
943
944<td>MISSING </td>
945 </tr>
946
947<tr class="a">
948
949<td>IS NOT NULL </td>
950
951<td>TRUE </td>
952
953<td>FALSE </td>
954
955<td>MISSING </td>
956 </tr>
957
958<tr class="b">
959
960<td>IS MISSING </td>
961
962<td>FALSE </td>
963
964<td>FALSE </td>
965
966<td>TRUE </td>
967 </tr>
968
969<tr class="a">
970
971<td>IS NOT MISSING </td>
972
973<td>TRUE </td>
974
975<td>TRUE </td>
976
977<td>FALSE </td>
978 </tr>
979
980<tr class="b">
981
982<td>IS UNKNOWN </td>
983
984<td>FALSE </td>
985
986<td>TRUE </td>
987
988<td>TRUE </td>
989 </tr>
990
991<tr class="a">
992
993<td>IS NOT UNKNOWN </td>
994
995<td>TRUE </td>
996
997<td>FALSE </td>
998
999<td>FALSE</td>
1000 </tr>
1001 </tbody>
1002</table></div></div></div>
1003<div class="section">
1004<h3><a name="Logical_Operators"></a><a name="Logical_operators" id="Logical_operators">Logical Operators</a></h3>
1005<p>Logical operators perform logical <tt>NOT</tt>, <tt>AND</tt>, and <tt>OR</tt> operations over Boolean values (<tt>TRUE</tt> and <tt>FALSE</tt>) plus <tt>NULL</tt> and <tt>MISSING</tt>.</p>
1006
1007<table border="0" class="table table-striped">
1008 <thead>
1009
1010<tr class="a">
1011
1012<th>Operator </th>
1013
1014<th>Purpose </th>
1015
1016<th>Example </th>
1017 </tr>
1018 </thead>
1019 <tbody>
1020
1021<tr class="b">
1022
1023<td>NOT </td>
1024
1025<td>Returns true if the following condition is false, otherwise returns false </td>
1026
1027<td>SELECT VALUE NOT TRUE; </td>
1028 </tr>
1029
1030<tr class="a">
1031
1032<td>AND </td>
1033
1034<td>Returns true if both branches are true, otherwise returns false </td>
1035
1036<td>SELECT VALUE TRUE AND FALSE; </td>
1037 </tr>
1038
1039<tr class="b">
1040
1041<td>OR </td>
1042
1043<td>Returns true if one branch is true, otherwise returns false </td>
1044
1045<td>SELECT VALUE FALSE OR FALSE; </td>
1046 </tr>
1047 </tbody>
1048</table>
1049<p>The following table is the truth table for <tt>AND</tt> and <tt>OR</tt>.</p>
1050
1051<table border="0" class="table table-striped">
1052 <thead>
1053
1054<tr class="a">
1055
1056<th>A </th>
1057
1058<th>B </th>
1059
1060<th>A AND B </th>
1061
1062<th>A OR B </th>
1063 </tr>
1064 </thead>
1065 <tbody>
1066
1067<tr class="b">
1068
1069<td>TRUE </td>
1070
1071<td>TRUE </td>
1072
1073<td>TRUE </td>
1074
1075<td>TRUE </td>
1076 </tr>
1077
1078<tr class="a">
1079
1080<td>TRUE </td>
1081
1082<td>FALSE </td>
1083
1084<td>FALSE </td>
1085
1086<td>TRUE </td>
1087 </tr>
1088
1089<tr class="b">
1090
1091<td>TRUE </td>
1092
1093<td>NULL </td>
1094
1095<td>NULL </td>
1096
1097<td>TRUE </td>
1098 </tr>
1099
1100<tr class="a">
1101
1102<td>TRUE </td>
1103
1104<td>MISSING </td>
1105
1106<td>MISSING </td>
1107
1108<td>TRUE </td>
1109 </tr>
1110
1111<tr class="b">
1112
1113<td>FALSE </td>
1114
1115<td>FALSE </td>
1116
1117<td>FALSE </td>
1118
1119<td>FALSE </td>
1120 </tr>
1121
1122<tr class="a">
1123
1124<td>FALSE </td>
1125
1126<td>NULL </td>
1127
1128<td>FALSE </td>
1129
1130<td>NULL </td>
1131 </tr>
1132
1133<tr class="b">
1134
1135<td>FALSE </td>
1136
1137<td>MISSING </td>
1138
1139<td>FALSE </td>
1140
1141<td>MISSING </td>
1142 </tr>
1143
1144<tr class="a">
1145
1146<td>NULL </td>
1147
1148<td>NULL </td>
1149
1150<td>NULL </td>
1151
1152<td>NULL </td>
1153 </tr>
1154
1155<tr class="b">
1156
1157<td>NULL </td>
1158
1159<td>MISSING </td>
1160
1161<td>MISSING </td>
1162
1163<td>NULL </td>
1164 </tr>
1165
1166<tr class="a">
1167
1168<td>MISSING </td>
1169
1170<td>MISSING </td>
1171
1172<td>MISSING </td>
1173
1174<td>MISSING </td>
1175 </tr>
1176 </tbody>
1177</table>
1178<p>The following table demonstrates the results of <tt>NOT</tt> on all possible inputs.</p>
1179
1180<table border="0" class="table table-striped">
1181 <thead>
1182
1183<tr class="a">
1184
1185<th>A </th>
1186
1187<th>NOT A </th>
1188 </tr>
1189 </thead>
1190 <tbody>
1191
1192<tr class="b">
1193
1194<td>TRUE </td>
1195
1196<td>FALSE </td>
1197 </tr>
1198
1199<tr class="a">
1200
1201<td>FALSE </td>
1202
1203<td>TRUE </td>
1204 </tr>
1205
1206<tr class="b">
1207
1208<td>NULL </td>
1209
1210<td>NULL </td>
1211 </tr>
1212
1213<tr class="a">
1214
1215<td>MISSING </td>
1216
1217<td>MISSING </td>
1218 </tr>
1219 </tbody>
1220</table></div></div>
1221<div class="section">
1222<h2><a name="Case_Expressions"></a><a name="Case_expressions" id="Case_expressions">Case Expressions</a></h2>
1223
1224<div class="source">
1225<div class="source">
1226<pre>CaseExpression ::= SimpleCaseExpression | SearchedCaseExpression
1227SimpleCaseExpression ::= &lt;CASE&gt; Expression ( &lt;WHEN&gt; Expression &lt;THEN&gt; Expression )+ ( &lt;ELSE&gt; Expression )? &lt;END&gt;
1228SearchedCaseExpression ::= &lt;CASE&gt; ( &lt;WHEN&gt; Expression &lt;THEN&gt; Expression )+ ( &lt;ELSE&gt; Expression )? &lt;END&gt;
1229</pre></div></div>
1230<p>In a simple <tt>CASE</tt> expression, the query evaluator searches for the first <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pair in which the <tt>WHEN</tt> expression is equal to the expression following <tt>CASE</tt> and returns the expression following <tt>THEN</tt>. If none of the <tt>WHEN</tt> &#x2026; <tt>THEN</tt> pairs meet this condition, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, <tt>NULL</tt> is returned.</p>
1231<p>In a searched CASE expression, the query evaluator searches from left to right until it finds a <tt>WHEN</tt> expression that is evaluated to <tt>TRUE</tt>, and then returns its corresponding <tt>THEN</tt> expression. If no condition is found to be <tt>TRUE</tt>, and an <tt>ELSE</tt> branch exists, it returns the <tt>ELSE</tt> expression. Otherwise, it returns <tt>NULL</tt>.</p>
1232<p>The following example illustrates the form of a case expression.</p>
1233<div class="section">
1234<div class="section">
1235<div class="section">
1236<h5><a name="Example"></a>Example</h5>
1237
1238<div class="source">
1239<div class="source">
1240<pre>CASE (2 &lt; 3) WHEN true THEN &quot;yes&quot; ELSE &quot;no&quot; END
1241</pre></div></div></div></div></div></div>
1242<div class="section">
1243<h2><a name="Quantified_Expressions"></a><a name="Quantified_expressions" id="Quantified_expressions">Quantified Expressions</a></h2>
1244
1245<div class="source">
1246<div class="source">
1247<pre>QuantifiedExpression ::= ( (&lt;ANY&gt;|&lt;SOME&gt;) | &lt;EVERY&gt; ) Variable &lt;IN&gt; Expression ( &quot;,&quot; Variable &quot;in&quot; Expression )*
1248 &lt;SATISFIES&gt; Expression (&lt;END&gt;)?
1249</pre></div></div>
1250<p>Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.</p>
1251<p>The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the set [1, 2, 3] of integers is less than three. The first example yields <tt>FALSE</tt> and second example yields <tt>TRUE</tt>.</p>
1252<p>It is useful to note that if the set were instead the empty set, the first expression would yield <tt>TRUE</tt> (&#x201c;every&#x201d; value in an empty set satisfies the condition) while the second expression would yield <tt>FALSE</tt> (since there isn&#x2019;t &#x201c;some&#x201d; value, as there are no values in the set, that satisfies the condition).</p>
1253<p>A quantified expression will return a <tt>NULL</tt> (or <tt>MISSING</tt>) if the first expression in it evaluates to <tt>NULL</tt> (or <tt>MISSING</tt>). A type error will be raised if the first expression in a quantified expression does not return a collection.</p>
1254<div class="section">
1255<div class="section">
1256<div class="section">
1257<h5><a name="Examples"></a>Examples</h5>
1258
1259<div class="source">
1260<div class="source">
1261<pre>EVERY x IN [ 1, 2, 3 ] SATISFIES x &lt; 3
1262SOME x IN [ 1, 2, 3 ] SATISFIES x &lt; 3
1263</pre></div></div></div></div></div></div>
1264<div class="section">
1265<h2><a name="Path_Expressions"></a><a name="Path_expressions" id="Path_expressions">Path Expressions</a></h2>
1266
1267<div class="source">
1268<div class="source">
1269<pre>PathExpression ::= PrimaryExpression ( Field | Index )*
1270Field ::= &quot;.&quot; Identifier
1271Index ::= &quot;[&quot; ( Expression | &quot;?&quot; ) &quot;]&quot;
1272</pre></div></div>
1273<p>Components of complex types in the data model are accessed via path expressions. Path access can be applied to the result of a SQL++ expression that yields an instance of a complex type, for example, a object or array instance. For objects, path access is based on field names. For arrays, path access is based on (zero-based) array-style indexing. SQL++ also supports an &#x201c;I&#x2019;m feeling lucky&#x201d; style index accessor, [?], for selecting an arbitrary element from an array. Attempts to access non-existent fields or out-of-bound array elements produce the special value <tt>MISSING</tt>. Type errors will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.</p>
1274<p>The following examples illustrate field access for a object, index-based element access for an array, and also a composition thereof.</p>
1275<div class="section">
1276<div class="section">
1277<div class="section">
1278<h5><a name="Examples"></a>Examples</h5>
1279
1280<div class="source">
1281<div class="source">
1282<pre>({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array
1283
1284([&quot;a&quot;, &quot;b&quot;, &quot;c&quot;])[2]
1285
1286({&quot;name&quot;: &quot;MyABCs&quot;, &quot;array&quot;: [ &quot;a&quot;, &quot;b&quot;, &quot;c&quot;]}).array[2]
1287</pre></div></div></div></div></div></div>
1288<div class="section">
1289<h2><a name="Primary_Expressions"></a><a name="Primary_expressions" id="Primary_expressions">Primary Expressions</a></h2>
1290
1291<div class="source">
1292<div class="source">
1293<pre>PrimaryExpr ::= Literal
1294 | VariableReference
1295 | ParenthesizedExpression
1296 | FunctionCallExpression
1297 | Constructor
1298</pre></div></div>
1299<p>The most basic building block for any SQL++ expression is PrimaryExpression. This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object, array, or multiset of data model instances).</p></div>
1300<div class="section">
1301<h2><a name="Literals" id="Literals">Literals</a></h2>
1302
1303<div class="source">
1304<div class="source">
1305<pre>Literal ::= StringLiteral
1306 | IntegerLiteral
1307 | FloatLiteral
1308 | DoubleLiteral
1309 | &lt;NULL&gt;
1310 | &lt;MISSING&gt;
1311 | &lt;TRUE&gt;
1312 | &lt;FALSE&gt;
1313StringLiteral ::= &quot;\&quot;&quot; (
1314 &lt;EscapeQuot&gt;
1315 | &lt;EscapeBslash&gt;
1316 | &lt;EscapeSlash&gt;
1317 | &lt;EscapeBspace&gt;
1318 | &lt;EscapeFormf&gt;
1319 | &lt;EscapeNl&gt;
1320 | &lt;EscapeCr&gt;
1321 | &lt;EscapeTab&gt;
1322 | ~[&quot;\&quot;&quot;,&quot;\\&quot;])*
1323 &quot;\&quot;&quot;
1324 | &quot;\'&quot;(
1325 &lt;EscapeApos&gt;
1326 | &lt;EscapeBslash&gt;
1327 | &lt;EscapeSlash&gt;
1328 | &lt;EscapeBspace&gt;
1329 | &lt;EscapeFormf&gt;
1330 | &lt;EscapeNl&gt;
1331 | &lt;EscapeCr&gt;
1332 | &lt;EscapeTab&gt;
1333 | ~[&quot;\'&quot;,&quot;\\&quot;])*
1334 &quot;\'&quot;
1335&lt;ESCAPE_Apos&gt; ::= &quot;\\\'&quot;
1336&lt;ESCAPE_Quot&gt; ::= &quot;\\\&quot;&quot;
1337&lt;EscapeBslash&gt; ::= &quot;\\\\&quot;
1338&lt;EscapeSlash&gt; ::= &quot;\\/&quot;
1339&lt;EscapeBspace&gt; ::= &quot;\\b&quot;
1340&lt;EscapeFormf&gt; ::= &quot;\\f&quot;
1341&lt;EscapeNl&gt; ::= &quot;\\n&quot;
1342&lt;EscapeCr&gt; ::= &quot;\\r&quot;
1343&lt;EscapeTab&gt; ::= &quot;\\t&quot;
1344
1345IntegerLiteral ::= &lt;DIGITS&gt;
1346&lt;DIGITS&gt; ::= [&quot;0&quot; - &quot;9&quot;]+
1347FloatLiteral ::= &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; )
1348 | &lt;DIGITS&gt; ( &quot;.&quot; &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; ) )?
1349 | &quot;.&quot; &lt;DIGITS&gt; ( &quot;f&quot; | &quot;F&quot; )
1350DoubleLiteral ::= &lt;DIGITS&gt; &quot;.&quot; &lt;DIGITS&gt;
1351 | &quot;.&quot; &lt;DIGITS&gt;
1352</pre></div></div>
1353<p>Literals (constants) in SQL++ can be strings, integers, floating point values, double values, boolean constants, or special constant values like <tt>NULL</tt> and <tt>MISSING</tt>. The <tt>NULL</tt> value is like a <tt>NULL</tt> in SQL; it is used to represent an unknown field value. The specialy value <tt>MISSING</tt> is only meaningful in the context of SQL++ field accesses; it occurs when the accessed field simply does not exist at all in a object being accessed.</p>
1354<p>The following are some simple examples of SQL++ literals.</p>
1355<div class="section">
1356<div class="section">
1357<div class="section">
1358<h5><a name="Examples"></a>Examples</h5>
1359
1360<div class="source">
1361<div class="source">
1362<pre>'a string'
1363&quot;test string&quot;
136442
1365</pre></div></div>
1366<p>Different from standard SQL, double quotes play the same role as single quotes and may be used for string literals in SQL++.</p></div></div></div>
1367<div class="section">
1368<h3><a name="Variable_References"></a><a name="Variable_references" id="Variable_references">Variable References</a></h3>
1369
1370<div class="source">
1371<div class="source">
1372<pre>VariableReference ::= &lt;IDENTIFIER&gt;|&lt;DelimitedIdentifier&gt;
1373&lt;IDENTIFIER&gt; ::= &lt;LETTER&gt; (&lt;LETTER&gt; | &lt;DIGIT&gt; | &quot;_&quot; | &quot;$&quot;)*
1374&lt;LETTER&gt; ::= [&quot;A&quot; - &quot;Z&quot;, &quot;a&quot; - &quot;z&quot;]
1375DelimitedIdentifier ::= &quot;`&quot; (&lt;EscapeQuot&gt;
1376 | &lt;EscapeBslash&gt;
1377 | &lt;EscapeSlash&gt;
1378 | &lt;EscapeBspace&gt;
1379 | &lt;EscapeFormf&gt;
1380 | &lt;EscapeNl&gt;
1381 | &lt;EscapeCr&gt;
1382 | &lt;EscapeTab&gt;
1383 | ~[&quot;`&quot;,&quot;\\&quot;])*
1384 &quot;`&quot;
1385</pre></div></div>
1386<p>A variable in SQL++ can be bound to any legal data model value. A variable reference refers to the value to which an in-scope variable is bound. (E.g., a variable binding may originate from one of the <tt>FROM</tt>, <tt>WITH</tt> or <tt>LET</tt> clauses of a <tt>SELECT</tt> statement or from an input parameter in the context of a function body.) Backticks, for example, `id`, are used for delimited identifiers. Delimiting is needed when a variable&#x2019;s desired name clashes with a SQL++ keyword or includes characters not allowed in regular identifiers.</p>
1387<div class="section">
1388<div class="section">
1389<h5><a name="Examples"></a>Examples</h5>
1390
1391<div class="source">
1392<div class="source">
1393<pre>tweet
1394id
1395`SELECT`
1396`my-function`
1397</pre></div></div></div></div></div>
1398<div class="section">
1399<h3><a name="Parenthesized_Expressions"></a><a name="Parenthesized_expressions" id="Parenthesized_expressions">Parenthesized Expressions</a></h3>
1400
1401<div class="source">
1402<div class="source">
1403<pre>ParenthesizedExpression ::= &quot;(&quot; Expression &quot;)&quot; | Subquery
1404</pre></div></div>
1405<p>An expression can be parenthesized to control the precedence order or otherwise clarify a query. In SQL++, for composability, a subquery is also an parenthesized expression.</p>
1406<p>The following expression evaluates to the value 2.</p>
1407<div class="section">
1408<div class="section">
1409<h5><a name="Example"></a>Example</h5>
1410
1411<div class="source">
1412<div class="source">
1413<pre>( 1 + 1 )
1414</pre></div></div></div></div></div>
1415<div class="section">
1416<h3><a name="Function_Call_Expressions"></a><a name="Function_call_expressions" id="Function_call_expressions">Function Call Expressions</a></h3>
1417
1418<div class="source">
1419<div class="source">
1420<pre>FunctionCallExpression ::= FunctionName &quot;(&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;)&quot;
1421</pre></div></div>
1422<p>Functions are included in SQL++, like most languages, as a way to package useful functionality or to componentize complicated or reusable SQL++ computations. A function call is a legal SQL++ query expression that represents the value resulting from the evaluation of its body expression with the given parameter bindings; the parameter value bindings can themselves be any SQL++ expressions.</p>
1423<p>The following example is a (built-in) function call expression whose value is 8.</p>
1424<div class="section">
1425<div class="section">
1426<h5><a name="Example"></a>Example</h5>
1427
1428<div class="source">
1429<div class="source">
1430<pre>length('a string')
1431</pre></div></div></div></div></div>
1432<div class="section">
1433<h3><a name="Constructors" id="Constructors">Constructors</a></h3>
1434
1435<div class="source">
1436<div class="source">
1437<pre>Constructor ::= ArrayConstructor | MultisetConstructor | ObjectConstructor
1438ArrayConstructor ::= &quot;[&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;]&quot;
1439MultisetConstructor ::= &quot;{{&quot; ( Expression ( &quot;,&quot; Expression )* )? &quot;}}&quot;
1440ObjectConstructor ::= &quot;{&quot; ( FieldBinding ( &quot;,&quot; FieldBinding )* )? &quot;}&quot;
1441FieldBinding ::= Expression &quot;:&quot; Expression
1442</pre></div></div>
1443<p>A major feature of SQL++ is its ability to construct new data model instances. This is accomplished using its constructors for each of the model&#x2019;s complex object structures, namely arrays, multisets, and objects. Arrays are like JSON arrays, while multisets have bag semantics. Objects are built from fields that are field-name/field-value pairs, again like JSON.</p>
1444<p>The following examples illustrate how to construct a new array with 4 items and a new object with 2 fields respectively. Array elements can be homogeneous (as in the first example), which is the common case, or they may be heterogeneous (as in the second example). The data values and field name values used to construct arrays, multisets, and objects in constructors are all simply SQL++ expressions. Thus, the collection elements, field names, and field values used in constructors can be simple literals or they can come from query variable references or even arbitrarily complex SQL++ expressions (subqueries). Type errors will be raised if the field names in an object are not strings, and duplicate field errors will be raised if they are not distinct.</p>
1445<div class="section">
1446<div class="section">
1447<h5><a name="Examples"></a>Examples</h5>
1448
1449<div class="source">
1450<div class="source">
1451<pre>[ 'a', 'b', 'c', 'c' ]
1452
1453[ 42, &quot;forty-two!&quot;, { &quot;rank&quot; : &quot;Captain&quot;, &quot;name&quot;: &quot;America&quot; }, 3.14159 ]
1454
1455{
1456 'project name': 'Hyracks',
1457 'project members': [ 'vinayakb', 'dtabass', 'chenli', 'tsotras', 'tillw' ]
1458}
1459</pre></div></div>
1460<!-- ! Licensed to the Apache Software Foundation (ASF) under one
1461 ! or more contributor license agreements. See the NOTICE file
1462 ! distributed with this work for additional information
1463 ! regarding copyright ownership. The ASF licenses this file
1464 ! to you under the Apache License, Version 2.0 (the
1465 ! "License"); you may not use this file except in compliance
1466 ! with the License. You may obtain a copy of the License at
1467 !
1468 ! http://www.apache.org/licenses/LICENSE-2.0
1469 !
1470 ! Unless required by applicable law or agreed to in writing,
1471 ! software distributed under the License is distributed on an
1472 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1473 ! KIND, either express or implied. See the License for the
1474 ! specific language governing permissions and limitations
1475 ! under the License.
1476 ! -->
1477<h1><a name="Queries" id="Queries">3. Queries</a></h1>
1478<p>A SQL++ query can be any legal SQL++ expression or <tt>SELECT</tt> statement. A SQL++ query always ends with a semicolon.</p>
1479
1480<div class="source">
1481<div class="source">
1482<pre>Query ::= (Expression | SelectStatement) &quot;;&quot;
1483</pre></div></div>
1484<!-- ! Licensed to the Apache Software Foundation (ASF) under one
1485 ! or more contributor license agreements. See the NOTICE file
1486 ! distributed with this work for additional information
1487 ! regarding copyright ownership. The ASF licenses this file
1488 ! to you under the Apache License, Version 2.0 (the
1489 ! "License"); you may not use this file except in compliance
1490 ! with the License. You may obtain a copy of the License at
1491 !
1492 ! http://www.apache.org/licenses/LICENSE-2.0
1493 !
1494 ! Unless required by applicable law or agreed to in writing,
1495 ! software distributed under the License is distributed on an
1496 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1497 ! KIND, either express or implied. See the License for the
1498 ! specific language governing permissions and limitations
1499 ! under the License.
1500 ! --></div></div></div></div>
1501<div class="section">
1502<h2><a name="Declarations" id="Declarations">Declarations</a></h2>
1503
1504<div class="source">
1505<div class="source">
1506<pre>DatabaseDeclaration ::= &quot;USE&quot; Identifier
1507</pre></div></div>
1508<p>At the uppermost level, the world of data is organized into data namespaces called <b>dataverses</b>. To set the default dataverse for a series of statements, the USE statement is provided in SQL++.</p>
1509<p>As an example, the following statement sets the default dataverse to be &#x201c;TinySocial&#x201d;.</p>
1510<div class="section">
1511<div class="section">
1512<div class="section">
1513<h5><a name="Example"></a>Example</h5>
1514
1515<div class="source">
1516<div class="source">
1517<pre>USE TinySocial;
1518</pre></div></div>
1519<!-- ! Licensed to the Apache Software Foundation (ASF) under one
1520 ! or more contributor license agreements. See the NOTICE file
1521 ! distributed with this work for additional information
1522 ! regarding copyright ownership. The ASF licenses this file
1523 ! to you under the Apache License, Version 2.0 (the
1524 ! "License"); you may not use this file except in compliance
1525 ! with the License. You may obtain a copy of the License at
1526 !
1527 ! http://www.apache.org/licenses/LICENSE-2.0
1528 !
1529 ! Unless required by applicable law or agreed to in writing,
1530 ! software distributed under the License is distributed on an
1531 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1532 ! KIND, either express or implied. See the License for the
1533 ! specific language governing permissions and limitations
1534 ! under the License.
1535 ! -->
1536<p>When writing a complex SQL++ query, it can sometimes be helpful to define one or more auxilliary functions that each address a sub-piece of the overall query. The declare function statement supports the creation of such helper functions. In general, the function body (expression) can be any legal SQL++ query expression.</p>
1537
1538<div class="source">
1539<div class="source">
1540<pre>FunctionDeclaration ::= &quot;DECLARE&quot; &quot;FUNCTION&quot; Identifier ParameterList &quot;{&quot; Expression &quot;}&quot;
1541ParameterList ::= &quot;(&quot; ( &lt;VARIABLE&gt; ( &quot;,&quot; &lt;VARIABLE&gt; )* )? &quot;)&quot;
1542</pre></div></div>
1543<p>The following is a simple example of a temporary SQL++ function definition and its use.</p></div>
1544<div class="section">
1545<h5><a name="Example"></a>Example</h5>
1546
1547<div class="source">
1548<div class="source">
1549<pre>DECLARE FUNCTION friendInfo(userId) {
1550 (SELECT u.id, u.name, len(u.friendIds) AS friendCount
1551 FROM GleambookUsers u
1552 WHERE u.id = userId)[0]
1553 };
1554
1555SELECT VALUE friendInfo(2);
1556</pre></div></div>
1557<p>For our sample data set, this returns:</p>
1558
1559<div class="source">
1560<div class="source">
1561<pre>[
1562 { &quot;id&quot;: 2, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;friendCount&quot;: 2 }
1563]
1564</pre></div></div>
1565<!-- ! Licensed to the Apache Software Foundation (ASF) under one
1566 ! or more contributor license agreements. See the NOTICE file
1567 ! distributed with this work for additional information
1568 ! regarding copyright ownership. The ASF licenses this file
1569 ! to you under the Apache License, Version 2.0 (the
1570 ! "License"); you may not use this file except in compliance
1571 ! with the License. You may obtain a copy of the License at
1572 !
1573 ! http://www.apache.org/licenses/LICENSE-2.0
1574 !
1575 ! Unless required by applicable law or agreed to in writing,
1576 ! software distributed under the License is distributed on an
1577 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1578 ! KIND, either express or implied. See the License for the
1579 ! specific language governing permissions and limitations
1580 ! under the License.
1581 ! --></div></div></div></div>
1582<div class="section">
1583<h2><a name="SELECT_Statements"></a><a name="SELECT_statements" id="SELECT_statements">SELECT Statements</a></h2>
1584<p>The following shows the (rich) grammar for the <tt>SELECT</tt> statement in SQL++.</p>
1585
1586<div class="source">
1587<div class="source">
1588<pre>SelectStatement ::= ( WithClause )?
1589 SelectSetOperation (OrderbyClause )? ( LimitClause )?
1590SelectSetOperation ::= SelectBlock (&lt;UNION&gt; &lt;ALL&gt; ( SelectBlock | Subquery ) )*
1591Subquery ::= &quot;(&quot; SelectStatement &quot;)&quot;
1592
1593SelectBlock ::= SelectClause
1594 ( FromClause ( LetClause )?)?
1595 ( WhereClause )?
1596 ( GroupbyClause ( LetClause )? ( HavingClause )? )?
1597 |
1598 FromClause ( LetClause )?
1599 ( WhereClause )?
1600 ( GroupbyClause ( LetClause )? ( HavingClause )? )?
1601 SelectClause
1602
1603SelectClause ::= &lt;SELECT&gt; ( &lt;ALL&gt; | &lt;DISTINCT&gt; )? ( SelectRegular | SelectValue )
1604SelectRegular ::= Projection ( &quot;,&quot; Projection )*
1605SelectValue ::= ( &lt;VALUE&gt; | &lt;ELEMENT&gt; | &lt;RAW&gt; ) Expression
1606Projection ::= ( Expression ( &lt;AS&gt; )? Identifier | &quot;*&quot; )
1607
1608FromClause ::= &lt;FROM&gt; FromTerm ( &quot;,&quot; FromTerm )*
1609FromTerm ::= Expression (( &lt;AS&gt; )? Variable)?
1610 ( ( JoinType )? ( JoinClause | UnnestClause ) )*
1611
1612JoinClause ::= &lt;JOIN&gt; Expression (( &lt;AS&gt; )? Variable)? &lt;ON&gt; Expression
1613UnnestClause ::= ( &lt;UNNEST&gt; | &lt;CORRELATE&gt; | &lt;FLATTEN&gt; ) Expression
1614 ( &lt;AS&gt; )? Variable ( &lt;AT&gt; Variable )?
1615JoinType ::= ( &lt;INNER&gt; | &lt;LEFT&gt; ( &lt;OUTER&gt; )? )
1616
1617WithClause ::= &lt;WITH&gt; WithElement ( &quot;,&quot; WithElement )*
1618LetClause ::= (&lt;LET&gt; | &lt;LETTING&gt;) LetElement ( &quot;,&quot; LetElement )*
1619LetElement ::= Variable &quot;=&quot; Expression
1620WithElement ::= Variable &lt;AS&gt; Expression
1621
1622WhereClause ::= &lt;WHERE&gt; Expression
1623
1624GroupbyClause ::= &lt;GROUP&gt; &lt;BY&gt; ( Expression ( (&lt;AS&gt;)? Variable )? ( &quot;,&quot; Expression ( (&lt;AS&gt;)? Variable )? )*
1625 ( &lt;GROUP&gt; &lt;AS&gt; Variable
1626 (&quot;(&quot; Variable &lt;AS&gt; VariableReference (&quot;,&quot; Variable &lt;AS&gt; VariableReference )* &quot;)&quot;)?
1627 )?
1628HavingClause ::= &lt;HAVING&gt; Expression
1629
1630OrderbyClause ::= &lt;ORDER&gt; &lt;BY&gt; Expression ( &lt;ASC&gt; | &lt;DESC&gt; )? ( &quot;,&quot; Expression ( &lt;ASC&gt; | &lt;DESC&gt; )? )*
1631LimitClause ::= &lt;LIMIT&gt; Expression ( &lt;OFFSET&gt; Expression )?
1632</pre></div></div>
1633<p>In this section, we will make use of two stored collections of objects (datasets), <tt>GleambookUsers</tt> and <tt>GleambookMessages</tt>, in a series of running examples to explain <tt>SELECT</tt> queries. The contents of the example collections are as follows:</p>
1634<p><tt>GleambookUsers</tt> collection (or, dataset):</p>
1635
1636<div class="source">
1637<div class="source">
1638<pre>[ {
1639 &quot;id&quot;:1,
1640 &quot;alias&quot;:&quot;Margarita&quot;,
1641 &quot;name&quot;:&quot;MargaritaStoddard&quot;,
1642 &quot;nickname&quot;:&quot;Mags&quot;,
1643 &quot;userSince&quot;:&quot;2012-08-20T10:10:00&quot;,
1644 &quot;friendIds&quot;:[2,3,6,10],
1645 &quot;employment&quot;:[{
1646 &quot;organizationName&quot;:&quot;Codetechno&quot;,
1647 &quot;start-date&quot;:&quot;2006-08-06&quot;
1648 },
1649 {
1650 &quot;organizationName&quot;:&quot;geomedia&quot;,
1651 &quot;start-date&quot;:&quot;2010-06-17&quot;,
1652 &quot;end-date&quot;:&quot;2010-01-26&quot;
1653 }],
1654 &quot;gender&quot;:&quot;F&quot;
1655},
1656{
1657 &quot;id&quot;:2,
1658 &quot;alias&quot;:&quot;Isbel&quot;,
1659 &quot;name&quot;:&quot;IsbelDull&quot;,
1660 &quot;nickname&quot;:&quot;Izzy&quot;,
1661 &quot;userSince&quot;:&quot;2011-01-22T10:10:00&quot;,
1662 &quot;friendIds&quot;:[1,4],
1663 &quot;employment&quot;:[{
1664 &quot;organizationName&quot;:&quot;Hexviafind&quot;,
1665 &quot;startDate&quot;:&quot;2010-04-27&quot;
1666 }]
1667},
1668{
1669 &quot;id&quot;:3,
1670 &quot;alias&quot;:&quot;Emory&quot;,
1671 &quot;name&quot;:&quot;EmoryUnk&quot;,
1672 &quot;userSince&quot;:&quot;2012-07-10T10:10:00&quot;,
1673 &quot;friendIds&quot;:[1,5,8,9],
1674 &quot;employment&quot;:[{
1675 &quot;organizationName&quot;:&quot;geomedia&quot;,
1676 &quot;startDate&quot;:&quot;2010-06-17&quot;,
1677 &quot;endDate&quot;:&quot;2010-01-26&quot;
1678 }]
1679} ]
1680</pre></div></div>
1681<p><tt>GleambookMessages</tt> collection (or, dataset):</p>
1682
1683<div class="source">
1684<div class="source">
1685<pre>[ {
1686 &quot;messageId&quot;:2,
1687 &quot;authorId&quot;:1,
1688 &quot;inResponseTo&quot;:4,
1689 &quot;senderLocation&quot;:[41.66,80.87],
1690 &quot;message&quot;:&quot; dislike x-phone its touch-screen is horrible&quot;
1691},
1692{
1693 &quot;messageId&quot;:3,
1694 &quot;authorId&quot;:2,
1695 &quot;inResponseTo&quot;:4,
1696 &quot;senderLocation&quot;:[48.09,81.01],
1697 &quot;message&quot;:&quot; like product-y the plan is amazing&quot;
1698},
1699{
1700 &quot;messageId&quot;:4,
1701 &quot;authorId&quot;:1,
1702 &quot;inResponseTo&quot;:2,
1703 &quot;senderLocation&quot;:[37.73,97.04],
1704 &quot;message&quot;:&quot; can't stand acast the network is horrible:(&quot;
1705},
1706{
1707 &quot;messageId&quot;:6,
1708 &quot;authorId&quot;:2,
1709 &quot;inResponseTo&quot;:1,
1710 &quot;senderLocation&quot;:[31.5,75.56],
1711 &quot;message&quot;:&quot; like product-z its platform is mind-blowing&quot;
1712}
1713{
1714 &quot;messageId&quot;:8,
1715 &quot;authorId&quot;:1,
1716 &quot;inResponseTo&quot;:11,
1717 &quot;senderLocation&quot;:[40.33,80.87],
1718 &quot;message&quot;:&quot; like ccast the 3G is awesome:)&quot;
1719},
1720{
1721 &quot;messageId&quot;:10,
1722 &quot;authorId&quot;:1,
1723 &quot;inResponseTo&quot;:12,
1724 &quot;senderLocation&quot;:[42.5,70.01],
1725 &quot;message&quot;:&quot; can't stand product-w the touch-screen is terrible&quot;
1726},
1727{
1728 &quot;messageId&quot;:11,
1729 &quot;authorId&quot;:1,
1730 &quot;inResponseTo&quot;:1,
1731 &quot;senderLocation&quot;:[38.97,77.49],
1732 &quot;message&quot;:&quot; can't stand acast its plan is terrible&quot;
1733} ]
1734</pre></div></div></div>
1735<div class="section">
1736<h2><a name="SELECT_Clause"></a><a name="Select_clauses" id="Select_clauses">SELECT Clause</a></h2>
1737<p>The SQL++ <tt>SELECT</tt> clause always returns a collection value as its result (even if the result is empty or a singleton).</p>
1738<div class="section">
1739<h3><a name="Select_ElementValueRaw"></a><a name="Select_element" id="Select_element">Select Element/Value/Raw</a></h3>
1740<p>The <tt>SELECT VALUE</tt> clause in SQL++ returns an array or multiset that contains the results of evaluating the <tt>VALUE</tt> expression, with one evaluation being performed per &#x201c;binding tuple&#x201d; (i.e., per <tt>FROM</tt> clause item) satisfying the statement&#x2019;s selection criteria. For historical reasons SQL++ also allows the keywords <tt>ELEMENT</tt> or <tt>RAW</tt> to be used in place of <tt>VALUE</tt> (not recommended).</p>
1741<p>If there is no FROM clause, the expression after <tt>VALUE</tt> is evaluated once with no binding tuples (except those inherited from an outer environment).</p>
1742<div class="section">
1743<div class="section">
1744<h5><a name="Example"></a>Example</h5>
1745
1746<div class="source">
1747<div class="source">
1748<pre>SELECT VALUE 1;
1749</pre></div></div>
1750<p>This query returns:</p>
1751
1752<div class="source">
1753<div class="source">
1754<pre>[
1755 1
1756]
1757</pre></div></div>
1758<p>The following example shows a query that selects one user from the GleambookUsers collection.</p></div>
1759<div class="section">
1760<h5><a name="Example"></a>Example</h5>
1761
1762<div class="source">
1763<div class="source">
1764<pre>SELECT VALUE user
1765FROM GleambookUsers user
1766WHERE user.id = 1;
1767</pre></div></div>
1768<p>This query returns:</p>
1769
1770<div class="source">
1771<div class="source">
1772<pre>[{
1773 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
1774 &quot;friendIds&quot;: [
1775 2,
1776 3,
1777 6,
1778 10
1779 ],
1780 &quot;gender&quot;: &quot;F&quot;,
1781 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
1782 &quot;nickname&quot;: &quot;Mags&quot;,
1783 &quot;alias&quot;: &quot;Margarita&quot;,
1784 &quot;id&quot;: 1,
1785 &quot;employment&quot;: [
1786 {
1787 &quot;organizationName&quot;: &quot;Codetechno&quot;,
1788 &quot;start-date&quot;: &quot;2006-08-06&quot;
1789 },
1790 {
1791 &quot;end-date&quot;: &quot;2010-01-26&quot;,
1792 &quot;organizationName&quot;: &quot;geomedia&quot;,
1793 &quot;start-date&quot;: &quot;2010-06-17&quot;
1794 }
1795 ]
1796} ]
1797</pre></div></div></div></div></div>
1798<div class="section">
1799<h3><a name="SQL-style_SELECT"></a><a name="SQL_select" id="SQL_select">SQL-style SELECT</a></h3>
1800<p>In SQL++, the traditional SQL-style <tt>SELECT</tt> syntax is also supported. This syntax can also be reformulated in a <tt>SELECT VALUE</tt> based manner in SQL++. (E.g., <tt>SELECT expA AS fldA, expB AS fldB</tt> is syntactic sugar for <tt>SELECT VALUE { 'fldA': expA, 'fldB': expB }</tt>.) Unlike in SQL, the result of an SQL++ query does not preserve the order of expressions in the <tt>SELECT</tt> clause.</p>
1801<div class="section">
1802<div class="section">
1803<h5><a name="Example"></a>Example</h5>
1804
1805<div class="source">
1806<div class="source">
1807<pre>SELECT user.alias user_alias, user.name user_name
1808FROM GleambookUsers user
1809WHERE user.id = 1;
1810</pre></div></div>
1811<p>Returns:</p>
1812
1813<div class="source">
1814<div class="source">
1815<pre>[ {
1816 &quot;user_name&quot;: &quot;MargaritaStoddard&quot;,
1817 &quot;user_alias&quot;: &quot;Margarita&quot;
1818} ]
1819</pre></div></div></div></div></div>
1820<div class="section">
1821<h3><a name="SELECT_"></a><a name="Select_star" id="Select_star">SELECT *</a></h3>
1822<p>In SQL++, <tt>SELECT *</tt> returns a object with a nested field for each input tuple. Each field has as its field name the name of a binding variable generated by either the <tt>FROM</tt> clause or <tt>GROUP BY</tt> clause in the current enclosing <tt>SELECT</tt> statement, and its field value is the value of that binding variable.</p>
1823<p>Note that the result of <tt>SELECT *</tt> is different from the result of query that selects all the fields of an object.</p>
1824<div class="section">
1825<div class="section">
1826<h5><a name="Example"></a>Example</h5>
1827
1828<div class="source">
1829<div class="source">
1830<pre>SELECT *
1831FROM GleambookUsers user;
1832</pre></div></div>
1833<p>Since <tt>user</tt> is the only binding variable generated in the <tt>FROM</tt> clause, this query returns:</p>
1834
1835<div class="source">
1836<div class="source">
1837<pre>[ {
1838 &quot;user&quot;: {
1839 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
1840 &quot;friendIds&quot;: [
1841 2,
1842 3,
1843 6,
1844 10
1845 ],
1846 &quot;gender&quot;: &quot;F&quot;,
1847 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
1848 &quot;nickname&quot;: &quot;Mags&quot;,
1849 &quot;alias&quot;: &quot;Margarita&quot;,
1850 &quot;id&quot;: 1,
1851 &quot;employment&quot;: [
1852 {
1853 &quot;organizationName&quot;: &quot;Codetechno&quot;,
1854 &quot;start-date&quot;: &quot;2006-08-06&quot;
1855 },
1856 {
1857 &quot;end-date&quot;: &quot;2010-01-26&quot;,
1858 &quot;organizationName&quot;: &quot;geomedia&quot;,
1859 &quot;start-date&quot;: &quot;2010-06-17&quot;
1860 }
1861 ]
1862 }
1863}, {
1864 &quot;user&quot;: {
1865 &quot;userSince&quot;: &quot;2011-01-22T10:10:00.000Z&quot;,
1866 &quot;friendIds&quot;: [
1867 1,
1868 4
1869 ],
1870 &quot;name&quot;: &quot;IsbelDull&quot;,
1871 &quot;nickname&quot;: &quot;Izzy&quot;,
1872 &quot;alias&quot;: &quot;Isbel&quot;,
1873 &quot;id&quot;: 2,
1874 &quot;employment&quot;: [
1875 {
1876 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1877 &quot;startDate&quot;: &quot;2010-04-27&quot;
1878 }
1879 ]
1880 }
1881}, {
1882 &quot;user&quot;: {
1883 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
1884 &quot;friendIds&quot;: [
1885 1,
1886 5,
1887 8,
1888 9
1889 ],
1890 &quot;name&quot;: &quot;EmoryUnk&quot;,
1891 &quot;alias&quot;: &quot;Emory&quot;,
1892 &quot;id&quot;: 3,
1893 &quot;employment&quot;: [
1894 {
1895 &quot;organizationName&quot;: &quot;geomedia&quot;,
1896 &quot;endDate&quot;: &quot;2010-01-26&quot;,
1897 &quot;startDate&quot;: &quot;2010-06-17&quot;
1898 }
1899 ]
1900 }
1901} ]
1902</pre></div></div></div>
1903<div class="section">
1904<h5><a name="Example"></a>Example</h5>
1905
1906<div class="source">
1907<div class="source">
1908<pre>SELECT *
1909FROM GleambookUsers u, GleambookMessages m
1910WHERE m.authorId = u.id and u.id = 2;
1911</pre></div></div>
1912<p>This query does an inner join that we will discuss in <a href="#Multiple_from_terms">multiple from terms</a>. Since both <tt>u</tt> and <tt>m</tt> are binding variables generated in the <tt>FROM</tt> clause, this query returns:</p>
1913
1914<div class="source">
1915<div class="source">
1916<pre>[ {
1917 &quot;u&quot;: {
1918 &quot;userSince&quot;: &quot;2011-01-22T10:10:00&quot;,
1919 &quot;friendIds&quot;: [
1920 1,
1921 4
1922 ],
1923 &quot;name&quot;: &quot;IsbelDull&quot;,
1924 &quot;nickname&quot;: &quot;Izzy&quot;,
1925 &quot;alias&quot;: &quot;Isbel&quot;,
1926 &quot;id&quot;: 2,
1927 &quot;employment&quot;: [
1928 {
1929 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1930 &quot;startDate&quot;: &quot;2010-04-27&quot;
1931 }
1932 ]
1933 },
1934 &quot;m&quot;: {
1935 &quot;senderLocation&quot;: [
1936 31.5,
1937 75.56
1938 ],
1939 &quot;inResponseTo&quot;: 1,
1940 &quot;messageId&quot;: 6,
1941 &quot;authorId&quot;: 2,
1942 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
1943 }
1944}, {
1945 &quot;u&quot;: {
1946 &quot;userSince&quot;: &quot;2011-01-22T10:10:00&quot;,
1947 &quot;friendIds&quot;: [
1948 1,
1949 4
1950 ],
1951 &quot;name&quot;: &quot;IsbelDull&quot;,
1952 &quot;nickname&quot;: &quot;Izzy&quot;,
1953 &quot;alias&quot;: &quot;Isbel&quot;,
1954 &quot;id&quot;: 2,
1955 &quot;employment&quot;: [
1956 {
1957 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
1958 &quot;startDate&quot;: &quot;2010-04-27&quot;
1959 }
1960 ]
1961 },
1962 &quot;m&quot;: {
1963 &quot;senderLocation&quot;: [
1964 48.09,
1965 81.01
1966 ],
1967 &quot;inResponseTo&quot;: 4,
1968 &quot;messageId&quot;: 3,
1969 &quot;authorId&quot;: 2,
1970 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
1971 }
1972} ]
1973</pre></div></div></div></div></div>
1974<div class="section">
1975<h3><a name="SELECT_DISTINCT"></a><a name="Select_distinct" id="Select_distinct">SELECT DISTINCT</a></h3>
1976<p>SQL++&#x2019;s <tt>DISTINCT</tt> keyword is used to eliminate duplicate items in results. The following example shows how it works.</p>
1977<div class="section">
1978<div class="section">
1979<h5><a name="Example"></a>Example</h5>
1980
1981<div class="source">
1982<div class="source">
1983<pre>SELECT DISTINCT * FROM [1, 2, 2, 3] AS foo;
1984</pre></div></div>
1985<p>This query returns:</p>
1986
1987<div class="source">
1988<div class="source">
1989<pre>[ {
1990 &quot;foo&quot;: 1
1991}, {
1992 &quot;foo&quot;: 2
1993}, {
1994 &quot;foo&quot;: 3
1995} ]
1996</pre></div></div></div>
1997<div class="section">
1998<h5><a name="Example"></a>Example</h5>
1999
2000<div class="source">
2001<div class="source">
2002<pre>SELECT DISTINCT VALUE foo FROM [1, 2, 2, 3] AS foo;
2003</pre></div></div>
2004<p>This version of the query returns:</p>
2005
2006<div class="source">
2007<div class="source">
2008<pre>[ 1
2009, 2
2010, 3
2011 ]
2012</pre></div></div></div></div></div>
2013<div class="section">
2014<h3><a name="Unnamed_Projections"></a><a name="Unnamed_projections" id="Unnamed_projections">Unnamed Projections</a></h3>
2015<p>Similar to standard SQL, SQL++ supports unnamed projections (a.k.a, unnamed <tt>SELECT</tt> clause items), for which names are generated. Name generation has three cases:</p>
2016
2017<ul>
2018
2019<li>If a projection expression is a variable reference expression, its generated name is the name of the variable.</li>
2020
2021<li>If a projection expression is a field access expression, its generated name is the last identifier in the expression.</li>
2022
2023<li>For all other cases, the query processor will generate a unique name.</li>
2024</ul>
2025<div class="section">
2026<div class="section">
2027<h5><a name="Example"></a>Example</h5>
2028
2029<div class="source">
2030<div class="source">
2031<pre>SELECT substr(user.name, 10), user.alias
2032FROM GleambookUsers user
2033WHERE user.id = 1;
2034</pre></div></div>
2035<p>This query outputs:</p>
2036
2037<div class="source">
2038<div class="source">
2039<pre>[ {
2040 &quot;alias&quot;: &quot;Margarita&quot;,
2041 &quot;$1&quot;: &quot;Stoddard&quot;
2042} ]
2043</pre></div></div>
2044<p>In the result, <tt>$1</tt> is the generated name for <tt>substr(user.name, 1)</tt>, while <tt>alias</tt> is the generated name for <tt>user.alias</tt>.</p></div></div></div>
2045<div class="section">
2046<h3><a name="Abbreviated_Field_Access_Expressions"></a><a name="Abbreviated_field_access_expressions" id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a></h3>
2047<p>As in standard SQL, SQL++ field access expressions can be abbreviated (not recommended) when there is no ambiguity. In the next example, the variable <tt>user</tt> is the only possible variable reference for fields <tt>id</tt>, <tt>name</tt> and <tt>alias</tt> and thus could be omitted in the query.</p>
2048<div class="section">
2049<div class="section">
2050<h5><a name="Example"></a>Example</h5>
2051
2052<div class="source">
2053<div class="source">
2054<pre>SELECT substr(name, 10) AS lname, alias
2055FROM GleambookUsers user
2056WHERE id = 1;
2057</pre></div></div>
2058<p>Outputs:</p>
2059
2060<div class="source">
2061<div class="source">
2062<pre>[ {
2063 &quot;lname&quot;: &quot;Stoddard&quot;,
2064 &quot;alias&quot;: &quot;Margarita&quot;
2065} ]
2066</pre></div></div></div></div></div></div>
2067<div class="section">
2068<h2><a name="UNNEST_Clause"></a><a name="Unnest_clauses" id="Unnest_clauses">UNNEST Clause</a></h2>
2069<p>For each of its input tuples, the <tt>UNNEST</tt> clause flattens a collection-valued expression into individual items, producing multiple tuples, each of which is one of the expression&#x2019;s original input tuples augmented with a flattened item from its collection.</p>
2070<div class="section">
2071<h3><a name="Inner_UNNEST"></a><a name="Inner_unnests" id="Inner_unnests">Inner UNNEST</a></h3>
2072<p>The following example is a query that retrieves the names of the organizations that a selected user has worked for. It uses the <tt>UNNEST</tt> clause to unnest the nested collection <tt>employment</tt> in the user&#x2019;s object.</p>
2073<div class="section">
2074<div class="section">
2075<h5><a name="Example"></a>Example</h5>
2076
2077<div class="source">
2078<div class="source">
2079<pre>SELECT u.id AS userId, e.organizationName AS orgName
2080FROM GleambookUsers u
2081UNNEST u.employment e
2082WHERE u.id = 1;
2083</pre></div></div>
2084<p>This query returns:</p>
2085
2086<div class="source">
2087<div class="source">
2088<pre>[ {
2089 &quot;orgName&quot;: &quot;Codetechno&quot;,
2090 &quot;userId&quot;: 1
2091}, {
2092 &quot;orgName&quot;: &quot;geomedia&quot;,
2093 &quot;userId&quot;: 1
2094} ]
2095</pre></div></div>
2096<p>Note that <tt>UNNEST</tt> has SQL&#x2019;s inner join semantics &#x2014; that is, if a user has no employment history, no tuple corresponding to that user will be emitted in the result.</p></div></div></div>
2097<div class="section">
2098<h3><a name="Left_Outer_UNNEST"></a><a name="Left_outer_unnests" id="Left_outer_unnests">Left Outer UNNEST</a></h3>
2099<p>As an alternative, the <tt>LEFT OUTER UNNEST</tt> clause offers SQL&#x2019;s left outer join semantics. For example, no collection-valued field named <tt>hobbies</tt> exists in the object for the user whose id is 1, but the following query&#x2019;s result still includes user 1.</p>
2100<div class="section">
2101<div class="section">
2102<h5><a name="Example"></a>Example</h5>
2103
2104<div class="source">
2105<div class="source">
2106<pre>SELECT u.id AS userId, h.hobbyName AS hobby
2107FROM GleambookUsers u
2108LEFT OUTER UNNEST u.hobbies h
2109WHERE u.id = 1;
2110</pre></div></div>
2111<p>Returns:</p>
2112
2113<div class="source">
2114<div class="source">
2115<pre>[ {
2116 &quot;userId&quot;: 1
2117} ]
2118</pre></div></div>
2119<p>Note that if <tt>u.hobbies</tt> is an empty collection or leads to a <tt>MISSING</tt> (as above) or <tt>NULL</tt> value for a given input tuple, there is no corresponding binding value for variable <tt>h</tt> for an input tuple. A <tt>MISSING</tt> value will be generated for <tt>h</tt> so that the input tuple can still be propagated.</p></div></div></div>
2120<div class="section">
2121<h3><a name="Expressing_Joins_Using_UNNEST"></a><a name="Expressing_joins_using_unnests" id="Expressing_joins_using_unnests">Expressing Joins Using UNNEST</a></h3>
2122<p>The SQL++ <tt>UNNEST</tt> clause is similar to SQL&#x2019;s <tt>JOIN</tt> clause except that it allows its right argument to be correlated to its left argument, as in the examples above &#x2014; i.e., think &#x201c;correlated cross-product&#x201d;. The next example shows this via a query that joins two data sets, GleambookUsers and GleambookMessages, returning user/message pairs. The results contain one object per pair, with result objects containing the user&#x2019;s name and an entire message. The query can be thought of as saying &#x201c;for each Gleambook user, unnest the <tt>GleambookMessages</tt> collection and filter the output with the condition <tt>message.authorId = user.id</tt>&#x201d;.</p>
2123<div class="section">
2124<div class="section">
2125<h5><a name="Example"></a>Example</h5>
2126
2127<div class="source">
2128<div class="source">
2129<pre>SELECT u.name AS uname, m.message AS message
2130FROM GleambookUsers u
2131UNNEST GleambookMessages m
2132WHERE m.authorId = u.id;
2133</pre></div></div>
2134<p>This returns:</p>
2135
2136<div class="source">
2137<div class="source">
2138<pre>[ {
2139 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2140 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
2141}, {
2142 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2143 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
2144}, {
2145 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2146 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
2147}, {
2148 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2149 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2150}, {
2151 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2152 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
2153}, {
2154 &quot;uname&quot;: &quot;IsbelDull&quot;,
2155 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2156}, {
2157 &quot;uname&quot;: &quot;IsbelDull&quot;,
2158 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2159} ]
2160</pre></div></div>
2161<p>Similarly, the above query can also be expressed as the <tt>UNNEST</tt>ing of a correlated SQL++ subquery:</p></div>
2162<div class="section">
2163<h5><a name="Example"></a>Example</h5>
2164
2165<div class="source">
2166<div class="source">
2167<pre>SELECT u.name AS uname, m.message AS message
2168FROM GleambookUsers u
2169UNNEST (
2170 SELECT VALUE msg
2171 FROM GleambookMessages msg
2172 WHERE msg.authorId = u.id
2173) AS m;
2174</pre></div></div></div></div></div></div>
2175<div class="section">
2176<h2><a name="FROM_clauses"></a><a name="From_clauses" id="From_clauses">FROM clauses</a></h2>
2177<p>A <tt>FROM</tt> clause is used for enumerating (i.e., conceptually iterating over) the contents of collections, as in SQL.</p>
2178<div class="section">
2179<h3><a name="Binding_expressions" id="Binding_expressions">Binding expressions</a></h3>
2180<p>In SQL++, in addition to stored collections, a <tt>FROM</tt> clause can iterate over any intermediate collection returned by a valid SQL++ expression. In the tuple stream generated by a <tt>FROM</tt> clause, the ordering of the input tuples are not guaranteed to be preserved.</p>
2181<div class="section">
2182<div class="section">
2183<h5><a name="Example"></a>Example</h5>
2184
2185<div class="source">
2186<div class="source">
2187<pre>SELECT VALUE foo
2188FROM [1, 2, 2, 3] AS foo
2189WHERE foo &gt; 2;
2190</pre></div></div>
2191<p>Returns:</p>
2192
2193<div class="source">
2194<div class="source">
2195<pre>[
2196 3
2197]
2198</pre></div></div></div></div></div>
2199<div class="section">
2200<h3><a name="Multiple_FROM_Terms"></a><a name="Multiple_from_terms" id="Multiple_from_terms">Multiple FROM Terms</a></h3>
2201<p>SQL++ permits correlations among <tt>FROM</tt> terms. Specifically, a <tt>FROM</tt> binding expression can refer to variables defined to its left in the given <tt>FROM</tt> clause. Thus, the first unnesting example above could also be expressed as follows:</p>
2202<div class="section">
2203<div class="section">
2204<h5><a name="Example"></a>Example</h5>
2205
2206<div class="source">
2207<div class="source">
2208<pre>SELECT u.id AS userId, e.organizationName AS orgName
2209FROM GleambookUsers u, u.employment e
2210WHERE u.id = 1;
2211</pre></div></div></div></div></div>
2212<div class="section">
2213<h3><a name="Expressing_Joins_Using_FROM_Terms"></a><a name="Expressing_joins_using_from_terms" id="Expressing_joins_using_from_terms">Expressing Joins Using FROM Terms</a></h3>
2214<p>Similarly, the join intentions of the other <tt>UNNEST</tt>-based join examples above could be expressed as:</p>
2215<div class="section">
2216<div class="section">
2217<h5><a name="Example"></a>Example</h5>
2218
2219<div class="source">
2220<div class="source">
2221<pre>SELECT u.name AS uname, m.message AS message
2222FROM GleambookUsers u, GleambookMessages m
2223WHERE m.authorId = u.id;
2224</pre></div></div></div>
2225<div class="section">
2226<h5><a name="Example"></a>Example</h5>
2227
2228<div class="source">
2229<div class="source">
2230<pre>SELECT u.name AS uname, m.message AS message
2231FROM GleambookUsers u,
2232 (
2233 SELECT VALUE msg
2234 FROM GleambookMessages msg
2235 WHERE msg.authorId = u.id
2236 ) AS m;
2237</pre></div></div>
2238<p>Note that the first alternative is one of the SQL-92 approaches to expressing a join.</p></div></div></div>
2239<div class="section">
2240<h3><a name="Implicit_Binding_Variables"></a><a name="Implicit_binding_variables" id="Implicit_binding_variables">Implicit Binding Variables</a></h3>
2241<p>Similar to standard SQL, SQL++ supports implicit <tt>FROM</tt> binding variables (i.e., aliases), for which a binding variable is generated. SQL++ variable generation falls into three cases:</p>
2242
2243<ul>
2244
2245<li>If the binding expression is a variable reference expression, the generated variable&#x2019;s name will be the name of the referenced variable itself.</li>
2246
2247<li>If the binding expression is a field access expression (or a fully qualified name for a dataset), the generated variable&#x2019;s name will be the last identifier (or the dataset name) in the expression.</li>
2248
2249<li>For all other cases, a compilation error will be raised.</li>
2250</ul>
2251<p>The next two examples show queries that do not provide binding variables in their <tt>FROM</tt> clauses.</p>
2252<div class="section">
2253<div class="section">
2254<h5><a name="Example"></a>Example</h5>
2255
2256<div class="source">
2257<div class="source">
2258<pre>SELECT GleambookUsers.name, GleambookMessages.message
2259FROM GleambookUsers, GleambookMessages
2260WHERE GleambookMessages.authorId = GleambookUsers.id;
2261</pre></div></div>
2262<p>Returns:</p>
2263
2264<div class="source">
2265<div class="source">
2266<pre>[ {
2267 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2268 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2269}, {
2270 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2271 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
2272}, {
2273 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2274 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
2275}, {
2276 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2277 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
2278}, {
2279 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
2280 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
2281}, {
2282 &quot;name&quot;: &quot;IsbelDull&quot;,
2283 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2284}, {
2285 &quot;name&quot;: &quot;IsbelDull&quot;,
2286 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2287} ]
2288</pre></div></div></div>
2289<div class="section">
2290<h5><a name="Example"></a>Example</h5>
2291
2292<div class="source">
2293<div class="source">
2294<pre>SELECT GleambookUsers.name, GleambookMessages.message
2295FROM GleambookUsers,
2296 (
2297 SELECT VALUE GleambookMessages
2298 FROM GleambookMessages
2299 WHERE GleambookMessages.authorId = GleambookUsers.id
2300 );
2301</pre></div></div>
2302<p>Returns:</p>
2303
2304<div class="source">
2305<div class="source">
2306<pre>Error: &quot;Syntax error: Need an alias for the enclosed expression:\n(select element GleambookMessages\n from GleambookMessages as GleambookMessages\n where (GleambookMessages.authorId = GleambookUsers.id)\n )&quot;,
2307 &quot;query_from_user&quot;: &quot;use TinySocial;\n\nSELECT GleambookUsers.name, GleambookMessages.message\n FROM GleambookUsers,\n (\n SELECT VALUE GleambookMessages\n FROM GleambookMessages\n WHERE GleambookMessages.authorId = GleambookUsers.id\n );&quot;
2308</pre></div></div></div></div></div></div>
2309<div class="section">
2310<h2><a name="JOIN_Clauses"></a><a name="Join_clauses" id="Join_clauses">JOIN Clauses</a></h2>
2311<p>The join clause in SQL++ supports both inner joins and left outer joins from standard SQL.</p>
2312<div class="section">
2313<h3><a name="Inner_joins" id="Inner_joins">Inner joins</a></h3>
2314<p>Using a <tt>JOIN</tt> clause, the inner join intent from the preceeding examples can also be expressed as follows:</p>
2315<div class="section">
2316<div class="section">
2317<h5><a name="Example"></a>Example</h5>
2318
2319<div class="source">
2320<div class="source">
2321<pre>SELECT u.name AS uname, m.message AS message
2322FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
2323</pre></div></div></div></div></div>
2324<div class="section">
2325<h3><a name="Left_Outer_Joins"></a><a name="Left_outer_joins" id="Left_outer_joins">Left Outer Joins</a></h3>
2326<p>SQL++ supports SQL&#x2019;s notion of left outer join. The following query is an example:</p>
2327
2328<div class="source">
2329<div class="source">
2330<pre>SELECT u.name AS uname, m.message AS message
2331FROM GleambookUsers u LEFT OUTER JOIN GleambookMessages m ON m.authorId = u.id;
2332</pre></div></div>
2333<p>Returns:</p>
2334
2335<div class="source">
2336<div class="source">
2337<pre>[ {
2338 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2339 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2340}, {
2341 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2342 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
2343}, {
2344 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2345 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
2346}, {
2347 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2348 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
2349}, {
2350 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
2351 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
2352}, {
2353 &quot;uname&quot;: &quot;IsbelDull&quot;,
2354 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2355}, {
2356 &quot;uname&quot;: &quot;IsbelDull&quot;,
2357 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2358}, {
2359 &quot;uname&quot;: &quot;EmoryUnk&quot;
2360} ]
2361</pre></div></div>
2362<p>For non-matching left-side tuples, SQL++ produces <tt>MISSING</tt> values for the right-side binding variables; that is why the last object in the above result doesn&#x2019;t have a <tt>message</tt> field. Note that this is slightly different from standard SQL, which instead would fill in <tt>NULL</tt> values for the right-side fields. The reason for this difference is that, for non-matches in its join results, SQL++ views fields from the right-side as being &#x201c;not there&#x201d; (a.k.a. <tt>MISSING</tt>) instead of as being &#x201c;there but unknown&#x201d; (i.e., <tt>NULL</tt>).</p>
2363<p>The left-outer join query can also be expressed using <tt>LEFT OUTER UNNEST</tt>:</p>
2364
2365<div class="source">
2366<div class="source">
2367<pre>SELECT u.name AS uname, m.message AS message
2368FROM GleambookUsers u
2369LEFT OUTER UNNEST (
2370 SELECT VALUE message
2371 FROM GleambookMessages message
2372 WHERE message.authorId = u.id
2373 ) m;
2374</pre></div></div>
2375<p>In general, in SQL++, SQL-style join queries can also be expressed by <tt>UNNEST</tt> clauses and left outer join queries can be expressed by <tt>LEFT OUTER UNNESTs</tt>.</p></div></div>
2376<div class="section">
2377<h2><a name="GROUP_BY_Clauses"></a><a name="Group_By_clauses" id="Group_By_clauses">GROUP BY Clauses</a></h2>
2378<p>The SQL++ <tt>GROUP BY</tt> clause generalizes standard SQL&#x2019;s grouping and aggregation semantics, but it also retains backward compatibility with the standard (relational) SQL <tt>GROUP BY</tt> and aggregation features.</p>
2379<div class="section">
2380<h3><a name="Group_variables" id="Group_variables">Group variables</a></h3>
2381<p>In a <tt>GROUP BY</tt> clause, in addition to the binding variable(s) defined for the grouping key(s), SQL++ allows a user to define a <i>group variable</i> by using the clause&#x2019;s <tt>GROUP AS</tt> extension to denote the resulting group. After grouping, then, the query&#x2019;s in-scope variables include the grouping key&#x2019;s binding variables as well as this group variable which will be bound to one collection value for each group. This per-group collection (i.e., multiset) value will be a set of nested objects in which each field of the object is the result of a renamed variable defined in parentheses following the group variable&#x2019;s name. The <tt>GROUP AS</tt> syntax is as follows:</p>
2382
2383<div class="source">
2384<div class="source">
2385<pre>&lt;GROUP&gt; &lt;AS&gt; Variable (&quot;(&quot; Variable &lt;AS&gt; VariableReference (&quot;,&quot; Variable &lt;AS&gt; VariableReference )* &quot;)&quot;)?
2386</pre></div></div>
2387<div class="section">
2388<div class="section">
2389<h5><a name="Example"></a>Example</h5>
2390
2391<div class="source">
2392<div class="source">
2393<pre>SELECT *
2394FROM GleambookMessages message
2395GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
2396</pre></div></div>
2397<p>This first example query returns:</p>
2398
2399<div class="source">
2400<div class="source">
2401<pre>[ {
2402 &quot;msgs&quot;: [
2403 {
2404 &quot;msg&quot;: {
2405 &quot;senderLocation&quot;: [
2406 38.97,
2407 77.49
2408 ],
2409 &quot;inResponseTo&quot;: 1,
2410 &quot;messageId&quot;: 11,
2411 &quot;authorId&quot;: 1,
2412 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
2413 }
2414 },
2415 {
2416 &quot;msg&quot;: {
2417 &quot;senderLocation&quot;: [
2418 41.66,
2419 80.87
2420 ],
2421 &quot;inResponseTo&quot;: 4,
2422 &quot;messageId&quot;: 2,
2423 &quot;authorId&quot;: 1,
2424 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
2425 }
2426 },
2427 {
2428 &quot;msg&quot;: {
2429 &quot;senderLocation&quot;: [
2430 37.73,
2431 97.04
2432 ],
2433 &quot;inResponseTo&quot;: 2,
2434 &quot;messageId&quot;: 4,
2435 &quot;authorId&quot;: 1,
2436 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
2437 }
2438 },
2439 {
2440 &quot;msg&quot;: {
2441 &quot;senderLocation&quot;: [
2442 40.33,
2443 80.87
2444 ],
2445 &quot;inResponseTo&quot;: 11,
2446 &quot;messageId&quot;: 8,
2447 &quot;authorId&quot;: 1,
2448 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2449 }
2450 },
2451 {
2452 &quot;msg&quot;: {
2453 &quot;senderLocation&quot;: [
2454 42.5,
2455 70.01
2456 ],
2457 &quot;inResponseTo&quot;: 12,
2458 &quot;messageId&quot;: 10,
2459 &quot;authorId&quot;: 1,
2460 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
2461 }
2462 }
2463 ],
2464 &quot;uid&quot;: 1
2465}, {
2466 &quot;msgs&quot;: [
2467 {
2468 &quot;msg&quot;: {
2469 &quot;senderLocation&quot;: [
2470 31.5,
2471 75.56
2472 ],
2473 &quot;inResponseTo&quot;: 1,
2474 &quot;messageId&quot;: 6,
2475 &quot;authorId&quot;: 2,
2476 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2477 }
2478 },
2479 {
2480 &quot;msg&quot;: {
2481 &quot;senderLocation&quot;: [
2482 48.09,
2483 81.01
2484 ],
2485 &quot;inResponseTo&quot;: 4,
2486 &quot;messageId&quot;: 3,
2487 &quot;authorId&quot;: 2,
2488 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2489 }
2490 }
2491 ],
2492 &quot;uid&quot;: 2
2493} ]
2494</pre></div></div>
2495<p>As we can see from the above query result, each group in the example query&#x2019;s output has an associated group variable value called <tt>msgs</tt> that appears in the <tt>SELECT *</tt>&#x2019;s result. This variable contains a collection of objects associated with the group; each of the group&#x2019;s <tt>message</tt> values appears in the <tt>msg</tt> field of the objects in the <tt>msgs</tt> collection.</p>
2496<p>The group variable in SQL++ makes more complex, composable, nested subqueries over a group possible, which is important given the more complex data model of SQL++ (relative to SQL). As a simple example of this, as we really just want the messages associated with each user, we might wish to avoid the &#x201c;extra wrapping&#x201d; of each message as the <tt>msg</tt> field of a object. (That wrapping is useful in more complex cases, but is essentially just in the way here.) We can use a subquery in the <tt>SELECT</tt> clase to tunnel through the extra nesting and produce the desired result.</p></div>
2497<div class="section">
2498<h5><a name="Example"></a>Example</h5>
2499
2500<div class="source">
2501<div class="source">
2502<pre>SELECT uid, (SELECT VALUE g.msg FROM g) AS msgs
2503FROM GleambookMessages gbm
2504GROUP BY gbm.authorId AS uid
2505GROUP AS g(gbm as msg);
2506</pre></div></div>
2507<p>This variant of the example query returns:</p>
2508
2509<div class="source">
2510<div class="source">
2511<pre> [ {
2512 &quot;msgs&quot;: [
2513 {
2514 &quot;senderLocation&quot;: [
2515 38.97,
2516 77.49
2517 ],
2518 &quot;inResponseTo&quot;: 1,
2519 &quot;messageId&quot;: 11,
2520 &quot;authorId&quot;: 1,
2521 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
2522 },
2523 {
2524 &quot;senderLocation&quot;: [
2525 41.66,
2526 80.87
2527 ],
2528 &quot;inResponseTo&quot;: 4,
2529 &quot;messageId&quot;: 2,
2530 &quot;authorId&quot;: 1,
2531 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
2532 },
2533 {
2534 &quot;senderLocation&quot;: [
2535 37.73,
2536 97.04
2537 ],
2538 &quot;inResponseTo&quot;: 2,
2539 &quot;messageId&quot;: 4,
2540 &quot;authorId&quot;: 1,
2541 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
2542 },
2543 {
2544 &quot;senderLocation&quot;: [
2545 40.33,
2546 80.87
2547 ],
2548 &quot;inResponseTo&quot;: 11,
2549 &quot;messageId&quot;: 8,
2550 &quot;authorId&quot;: 1,
2551 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2552 },
2553 {
2554 &quot;senderLocation&quot;: [
2555 42.5,
2556 70.01
2557 ],
2558 &quot;inResponseTo&quot;: 12,
2559 &quot;messageId&quot;: 10,
2560 &quot;authorId&quot;: 1,
2561 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
2562 }
2563 ],
2564 &quot;uid&quot;: 1
2565 }, {
2566 &quot;msgs&quot;: [
2567 {
2568 &quot;senderLocation&quot;: [
2569 31.5,
2570 75.56
2571 ],
2572 &quot;inResponseTo&quot;: 1,
2573 &quot;messageId&quot;: 6,
2574 &quot;authorId&quot;: 2,
2575 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2576 },
2577 {
2578 &quot;senderLocation&quot;: [
2579 48.09,
2580 81.01
2581 ],
2582 &quot;inResponseTo&quot;: 4,
2583 &quot;messageId&quot;: 3,
2584 &quot;authorId&quot;: 2,
2585 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2586 }
2587 ],
2588 &quot;uid&quot;: 2
2589 } ]
2590</pre></div></div>
2591<p>The next example shows a more interesting case involving the use of a subquery in the <tt>SELECT</tt> list. Here the subquery further processes the groups. There is no renaming in the declaration of the group variable <tt>g</tt> such that <tt>g</tt> only has one field <tt>gbm</tt> which comes from the <tt>FROM</tt> clause.</p></div>
2592<div class="section">
2593<h5><a name="Example"></a>Example</h5>
2594
2595<div class="source">
2596<div class="source">
2597<pre>SELECT uid,
2598 (SELECT VALUE g.gbm
2599 FROM g
2600 WHERE g.gbm.message LIKE '% like%'
2601 ORDER BY g.gbm.messageId
2602 LIMIT 2) AS msgs
2603FROM GleambookMessages gbm
2604GROUP BY gbm.authorId AS uid
2605GROUP AS g;
2606</pre></div></div>
2607<p>This example query returns:</p>
2608
2609<div class="source">
2610<div class="source">
2611<pre>[ {
2612 &quot;msgs&quot;: [
2613 {
2614 &quot;senderLocation&quot;: [
2615 40.33,
2616 80.87
2617 ],
2618 &quot;inResponseTo&quot;: 11,
2619 &quot;messageId&quot;: 8,
2620 &quot;authorId&quot;: 1,
2621 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2622 }
2623 ],
2624 &quot;uid&quot;: 1
2625}, {
2626 &quot;msgs&quot;: [
2627 {
2628 &quot;senderLocation&quot;: [
2629 48.09,
2630 81.01
2631 ],
2632 &quot;inResponseTo&quot;: 4,
2633 &quot;messageId&quot;: 3,
2634 &quot;authorId&quot;: 2,
2635 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2636 },
2637 {
2638 &quot;senderLocation&quot;: [
2639 31.5,
2640 75.56
2641 ],
2642 &quot;inResponseTo&quot;: 1,
2643 &quot;messageId&quot;: 6,
2644 &quot;authorId&quot;: 2,
2645 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2646 }
2647 ],
2648 &quot;uid&quot;: 2
2649} ]
2650</pre></div></div></div></div></div>
2651<div class="section">
2652<h3><a name="Implicit_Grouping_Key_Variables"></a><a name="Implicit_group_key_variables" id="Implicit_group_key_variables">Implicit Grouping Key Variables</a></h3>
2653<p>In the SQL++ syntax, providing named binding variables for <tt>GROUP BY</tt> key expressions is optional. If a grouping key is missing a user-provided binding variable, the underlying compiler will generate one. Automatic grouping key variable naming falls into three cases in SQL++, much like the treatment of unnamed projections:</p>
2654
2655<ul>
2656
2657<li>If the grouping key expression is a variable reference expression, the generated variable gets the same name as the referred variable;</li>
2658
2659<li>If the grouping key expression is a field access expression, the generated variable gets the same name as the last identifier in the expression;</li>
2660
2661<li>For all other cases, the compiler generates a unique variable (but the user query is unable to refer to this generated variable).</li>
2662</ul>
2663<p>The next example illustrates a query that doesn&#x2019;t provide binding variables for its grouping key expressions.</p>
2664<div class="section">
2665<div class="section">
2666<h5><a name="Example"></a>Example</h5>
2667
2668<div class="source">
2669<div class="source">
2670<pre>SELECT authorId,
2671 (SELECT VALUE g.gbm
2672 FROM g
2673 WHERE g.gbm.message LIKE '% like%'
2674 ORDER BY g.gbm.messageId
2675 LIMIT 2) AS msgs
2676FROM GleambookMessages gbm
2677GROUP BY gbm.authorId
2678GROUP AS g;
2679</pre></div></div>
2680<p>This query returns:</p>
2681
2682<div class="source">
2683<div class="source">
2684<pre> [ {
2685 &quot;msgs&quot;: [
2686 {
2687 &quot;senderLocation&quot;: [
2688 40.33,
2689 80.87
2690 ],
2691 &quot;inResponseTo&quot;: 11,
2692 &quot;messageId&quot;: 8,
2693 &quot;authorId&quot;: 1,
2694 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
2695 }
2696 ],
2697 &quot;authorId&quot;: 1
2698}, {
2699 &quot;msgs&quot;: [
2700 {
2701 &quot;senderLocation&quot;: [
2702 48.09,
2703 81.01
2704 ],
2705 &quot;inResponseTo&quot;: 4,
2706 &quot;messageId&quot;: 3,
2707 &quot;authorId&quot;: 2,
2708 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
2709 },
2710 {
2711 &quot;senderLocation&quot;: [
2712 31.5,
2713 75.56
2714 ],
2715 &quot;inResponseTo&quot;: 1,
2716 &quot;messageId&quot;: 6,
2717 &quot;authorId&quot;: 2,
2718 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
2719 }
2720 ],
2721 &quot;authorId&quot;: 2
2722} ]
2723</pre></div></div>
2724<p>Based on the three variable generation rules, the generated variable for the grouping key expression <tt>message.authorId</tt> is <tt>authorId</tt> (which is how it is referred to in the example&#x2019;s <tt>SELECT</tt> clause).</p></div></div></div>
2725<div class="section">
2726<h3><a name="Implicit_Group_Variables"></a><a name="Implicit_group_variables" id="Implicit_group_variables">Implicit Group Variables</a></h3>
2727<p>The group variable itself is also optional in SQL++&#x2019;s <tt>GROUP BY</tt> syntax. If a user&#x2019;s query does not declare the name and structure of the group variable using <tt>GROUP AS</tt>, the query compiler will generate a unique group variable whose fields include all of the binding variables defined in the <tt>FROM</tt> clause of the current enclosing <tt>SELECT</tt> statement. In this case the user&#x2019;s query will not be able to refer to the generated group variable, but is able to call SQL-92 aggregation functions as in SQL-92.</p></div>
2728<div class="section">
2729<h3><a name="Aggregation_Functions"></a><a name="Aggregation_functions" id="Aggregation_functions">Aggregation Functions</a></h3>
2730<p>In the traditional SQL, which doesn&#x2019;t support nested data, grouping always also involves the use of aggregation to compute properties of the groups (for example, the average number of messages per user rather than the actual set of messages per user). Each aggregation function in SQL++ takes a collection (for example, the group of messages) as its input and produces a scalar value as its output. These aggregation functions, being truly functional in nature (unlike in SQL), can be used anywhere in a query where an expression is allowed. The following table catalogs the SQL++ built-in aggregation functions and also indicates how each one handles <tt>NULL</tt>/<tt>MISSING</tt> values in the input collection or a completely empty input collection:</p>
2731
2732<table border="0" class="table table-striped">
2733 <thead>
2734
2735<tr class="a">
2736
2737<th>Function </th>
2738
2739<th>NULL </th>
2740
2741<th>MISSING </th>
2742
2743<th>Empty Collection </th>
2744 </tr>
2745 </thead>
2746 <tbody>
2747
2748<tr class="b">
2749
2750<td>COLL_COUNT </td>
2751
2752<td>counted </td>
2753
2754<td>counted </td>
2755
2756<td>0 </td>
2757 </tr>
2758
2759<tr class="a">
2760
2761<td>COLL_SUM </td>
2762
2763<td>returns NULL </td>
2764
2765<td>returns NULL </td>
2766
2767<td>returns NULL </td>
2768 </tr>
2769
2770<tr class="b">
2771
2772<td>COLL_MAX </td>
2773
2774<td>returns NULL </td>
2775
2776<td>returns NULL </td>
2777
2778<td>returns NULL </td>
2779 </tr>
2780
2781<tr class="a">
2782
2783<td>COLL_MIN </td>
2784
2785<td>returns NULL </td>
2786
2787<td>returns NULL </td>
2788
2789<td>returns NULL </td>
2790 </tr>
2791
2792<tr class="b">
2793
2794<td>COLL_AVG </td>
2795
2796<td>returns NULL </td>
2797
2798<td>returns NULL </td>
2799
2800<td>returns NULL </td>
2801 </tr>
2802
2803<tr class="a">
2804
2805<td>ARRAY_COUNT </td>
2806
2807<td>not counted </td>
2808
2809<td>not counted </td>
2810
2811<td>0 </td>
2812 </tr>
2813
2814<tr class="b">
2815
2816<td>ARRAY_SUM </td>
2817
2818<td>ignores NULL </td>
2819
2820<td>ignores NULL </td>
2821
2822<td>returns NULL </td>
2823 </tr>
2824
2825<tr class="a">
2826
2827<td>ARRAY_MAX </td>
2828
2829<td>ignores NULL </td>
2830
2831<td>ignores NULL </td>
2832
2833<td>returns NULL </td>
2834 </tr>
2835
2836<tr class="b">
2837
2838<td>ARRAY_MIN </td>
2839
2840<td>ignores NULL </td>
2841
2842<td>ignores NULL </td>
2843
2844<td>returns NULL </td>
2845 </tr>
2846
2847<tr class="a">
2848
2849<td>ARRAY_AVG </td>
2850
2851<td>ignores NULL </td>
2852
2853<td>ignores NULL </td>
2854
2855<td>returns NULL </td>
2856 </tr>
2857 </tbody>
2858</table>
2859<p>Notice that SQL++ has twice as many functions listed above as there are aggregate functions in SQL-92. This is because SQL++ offers two versions of each &#x2013; one that handles <tt>UNKNOWN</tt> values in a semantically strict fashion, where unknown values in the input result in unknown values in the output &#x2013; and one that handles them in the ad hoc &#x201c;just ignore the unknown values&#x201d; fashion that the SQL standard chose to adopt.</p>
2860<div class="section">
2861<div class="section">
2862<h5><a name="Example"></a>Example</h5>
2863
2864<div class="source">
2865<div class="source">
2866<pre>ARRAY_AVG(
2867 (
2868 SELECT VALUE ARRAY_COUNT(friendIds) FROM GleambookUsers
2869 )
2870);
2871</pre></div></div>
2872<p>This example returns:</p>
2873
2874<div class="source">
2875<div class="source">
2876<pre>3.3333333333333335
2877</pre></div></div></div>
2878<div class="section">
2879<h5><a name="Example"></a>Example</h5>
2880
2881<div class="source">
2882<div class="source">
2883<pre>SELECT uid AS uid, ARRAY_COUNT(grp) AS msgCnt
2884FROM GleambookMessages message
2885GROUP BY message.authorId AS uid
2886GROUP AS grp(message AS msg);
2887</pre></div></div>
2888<p>This query returns:</p>
2889
2890<div class="source">
2891<div class="source">
2892<pre>[ {
2893 &quot;uid&quot;: 1,
2894 &quot;msgCnt&quot;: 5
2895}, {
2896 &quot;uid&quot;: 2,
2897 &quot;msgCnt&quot;: 2
2898} ]
2899</pre></div></div>
2900<p>Notice how the query forms groups where each group involves a message author and their messages. (SQL cannot do this because the grouped intermediate result is non-1NF in nature.) The query then uses the collection aggregate function ARRAY_COUNT to get the cardinality of each group of messages.</p></div></div></div>
2901<div class="section">
2902<h3><a name="SQL-92_Aggregation_Functions"></a><a name="SQL-92_aggregation_functions" id="SQL-92_aggregation_functions">SQL-92 Aggregation Functions</a></h3>
2903<p>For compatibility with the traditional SQL aggregation functions, SQL++ also offers SQL-92&#x2019;s aggregation function symbols (<tt>COUNT</tt>, <tt>SUM</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>AVG</tt>) as supported syntactic sugar. The SQL++ compiler rewrites queries that utilize these function symbols into SQL++ queries that only use the SQL++ collection aggregate functions. The following example uses the SQL-92 syntax approach to compute a result that is identical to that of the more explicit SQL++ example above:</p>
2904<div class="section">
2905<div class="section">
2906<h5><a name="Example"></a>Example</h5>
2907
2908<div class="source">
2909<div class="source">
2910<pre>SELECT uid, COUNT(*) AS msgCnt
2911FROM GleambookMessages msg
2912GROUP BY msg.authorId AS uid;
2913</pre></div></div>
2914<p>It is important to realize that <tt>COUNT</tt> is actually <b>not</b> a SQL++ built-in aggregation function. Rather, the <tt>COUNT</tt> query above is using a special &#x201c;sugared&#x201d; function symbol that the SQL++ compiler will rewrite as follows:</p>
2915
2916<div class="source">
2917<div class="source">
2918<pre>SELECT uid AS uid, ARRAY_COUNT( (SELECT VALUE 1 FROM `$1` as g) ) AS msgCnt
2919FROM GleambookMessages msg
2920GROUP BY msg.authorId AS uid
2921GROUP AS `$1`(msg AS msg);
2922</pre></div></div>
2923<p>The same sort of rewritings apply to the function symbols <tt>SUM</tt>, <tt>MAX</tt>, <tt>MIN</tt>, and <tt>AVG</tt>. In contrast to the SQL++ collection aggregate functions, these special SQL-92 function symbols can only be used in the same way they are in standard SQL (i.e., with the same restrictions).</p></div></div></div>
2924<div class="section">
2925<h3><a name="SQL-92_Compliant_GROUP_BY_Aggregations"></a><a name="SQL-92_compliant_gby" id="SQL-92_compliant_gby">SQL-92 Compliant GROUP BY Aggregations</a></h3>
2926<p>SQL++ provides full support for SQL-92 <tt>GROUP BY</tt> aggregation queries. The following query is such an example:</p>
2927<div class="section">
2928<div class="section">
2929<h5><a name="Example"></a>Example</h5>
2930
2931<div class="source">
2932<div class="source">
2933<pre>SELECT msg.authorId, COUNT(*)
2934FROM GleambookMessages msg
2935GROUP BY msg.authorId;
2936</pre></div></div>
2937<p>This query outputs:</p>
2938
2939<div class="source">
2940<div class="source">
2941<pre>[ {
2942 &quot;authorId&quot;: 1,
2943 &quot;$1&quot;: 5
2944}, {
2945 &quot;authorId&quot;: 2,
2946 &quot;$1&quot;: 2
2947} ]
2948</pre></div></div>
2949<p>In principle, a <tt>msg</tt> reference in the query&#x2019;s <tt>SELECT</tt> clause would be &#x201c;sugarized&#x201d; as a collection (as described in <a href="#Implicit_group_variables">Implicit Group Variables</a>). However, since the SELECT expression <tt>msg.authorId</tt> is syntactically identical to a GROUP BY key expression, it will be internally replaced by the generated group key variable. The following is the equivalent rewritten query that will be generated by the compiler for the query above:</p>
2950
2951<div class="source">
2952<div class="source">
2953<pre>SELECT authorId AS authorId, ARRAY_COUNT( (SELECT g.msg FROM `$1` AS g) )
2954FROM GleambookMessages msg
2955GROUP BY msg.authorId AS authorId
2956GROUP AS `$1`(msg AS msg);
2957</pre></div></div></div></div></div>
2958<div class="section">
2959<h3><a name="Column_Aliases"></a><a name="Column_aliases" id="Column_aliases">Column Aliases</a></h3>
2960<p>SQL++ also allows column aliases to be used as <tt>GROUP BY</tt> keys or <tt>ORDER BY</tt> keys.</p>
2961<div class="section">
2962<div class="section">
2963<h5><a name="Example"></a>Example</h5>
2964
2965<div class="source">
2966<div class="source">
2967<pre>SELECT msg.authorId AS aid, COUNT(*)
2968FROM GleambookMessages msg
2969GROUP BY aid;
2970</pre></div></div>
2971<p>This query returns:</p>
2972
2973<div class="source">
2974<div class="source">
2975<pre>[ {
2976 &quot;$1&quot;: 5,
2977 &quot;aid&quot;: 1
2978}, {
2979 &quot;$1&quot;: 2,
2980 &quot;aid&quot;: 2
2981} ]
2982</pre></div></div></div></div></div></div>
2983<div class="section">
2984<h2><a name="WHERE_Clauses_and_HAVING_Clauses"></a><a name="Where_having_clauses" id="Where_having_clauses">WHERE Clauses and HAVING Clauses</a></h2>
2985<p>Both <tt>WHERE</tt> clauses and <tt>HAVING</tt> clauses are used to filter input data based on a condition expression. Only tuples for which the condition expression evaluates to <tt>TRUE</tt> are propagated. Note that if the condition expression evaluates to <tt>NULL</tt> or <tt>MISSING</tt> the input tuple will be disgarded.</p></div>
2986<div class="section">
2987<h2><a name="ORDER_BY_Clauses"></a><a name="Order_By_clauses" id="Order_By_clauses">ORDER BY Clauses</a></h2>
2988<p>The <tt>ORDER BY</tt> clause is used to globally sort data in either ascending order (i.e., <tt>ASC</tt>) or descending order (i.e., <tt>DESC</tt>). During ordering, <tt>MISSING</tt> and <tt>NULL</tt> are treated as being smaller than any other value if they are encountered in the ordering key(s). <tt>MISSING</tt> is treated as smaller than <tt>NULL</tt> if both occur in the data being sorted. The following example returns all <tt>GleambookUsers</tt> in descending order by their number of friends.</p>
2989<div class="section">
2990<div class="section">
2991<div class="section">
2992<h5><a name="Example"></a>Example</h5>
2993
2994<div class="source">
2995<div class="source">
2996<pre> SELECT VALUE user
2997 FROM GleambookUsers AS user
2998 ORDER BY ARRAY_COUNT(user.friendIds) DESC;
2999</pre></div></div>
3000<p>This query returns:</p>
3001
3002<div class="source">
3003<div class="source">
3004<pre> [ {
3005 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
3006 &quot;friendIds&quot;: [
3007 2,
3008 3,
3009 6,
3010 10
3011 ],
3012 &quot;gender&quot;: &quot;F&quot;,
3013 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
3014 &quot;nickname&quot;: &quot;Mags&quot;,
3015 &quot;alias&quot;: &quot;Margarita&quot;,
3016 &quot;id&quot;: 1,
3017 &quot;employment&quot;: [
3018 {
3019 &quot;organizationName&quot;: &quot;Codetechno&quot;,
3020 &quot;start-date&quot;: &quot;2006-08-06&quot;
3021 },
3022 {
3023 &quot;end-date&quot;: &quot;2010-01-26&quot;,
3024 &quot;organizationName&quot;: &quot;geomedia&quot;,
3025 &quot;start-date&quot;: &quot;2010-06-17&quot;
3026 }
3027 ]
3028 }, {
3029 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
3030 &quot;friendIds&quot;: [
3031 1,
3032 5,
3033 8,
3034 9
3035 ],
3036 &quot;name&quot;: &quot;EmoryUnk&quot;,
3037 &quot;alias&quot;: &quot;Emory&quot;,
3038 &quot;id&quot;: 3,
3039 &quot;employment&quot;: [
3040 {
3041 &quot;organizationName&quot;: &quot;geomedia&quot;,
3042 &quot;endDate&quot;: &quot;2010-01-26&quot;,
3043 &quot;startDate&quot;: &quot;2010-06-17&quot;
3044 }
3045 ]
3046 }, {
3047 &quot;userSince&quot;: &quot;2011-01-22T10:10:00.000Z&quot;,
3048 &quot;friendIds&quot;: [
3049 1,
3050 4
3051 ],
3052 &quot;name&quot;: &quot;IsbelDull&quot;,
3053 &quot;nickname&quot;: &quot;Izzy&quot;,
3054 &quot;alias&quot;: &quot;Isbel&quot;,
3055 &quot;id&quot;: 2,
3056 &quot;employment&quot;: [
3057 {
3058 &quot;organizationName&quot;: &quot;Hexviafind&quot;,
3059 &quot;startDate&quot;: &quot;2010-04-27&quot;
3060 }
3061 ]
3062 } ]
3063</pre></div></div></div></div></div></div>
3064<div class="section">
3065<h2><a name="LIMIT_Clauses"></a><a name="Limit_clauses" id="Limit_clauses">LIMIT Clauses</a></h2>
3066<p>The <tt>LIMIT</tt> clause is used to limit the result set to a specified constant size. The use of the <tt>LIMIT</tt> clause is illustrated in the next example.</p>
3067<div class="section">
3068<div class="section">
3069<div class="section">
3070<h5><a name="Example"></a>Example</h5>
3071
3072<div class="source">
3073<div class="source">
3074<pre> SELECT VALUE user
3075 FROM GleambookUsers AS user
3076 ORDER BY len(user.friendIds) DESC
3077 LIMIT 1;
3078</pre></div></div>
3079<p>This query returns:</p>
3080
3081<div class="source">
3082<div class="source">
3083<pre> [ {
3084 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
3085 &quot;friendIds&quot;: [
3086 2,
3087 3,
3088 6,
3089 10
3090 ],
3091 &quot;gender&quot;: &quot;F&quot;,
3092 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
3093 &quot;nickname&quot;: &quot;Mags&quot;,
3094 &quot;alias&quot;: &quot;Margarita&quot;,
3095 &quot;id&quot;: 1,
3096 &quot;employment&quot;: [
3097 {
3098 &quot;organizationName&quot;: &quot;Codetechno&quot;,
3099 &quot;start-date&quot;: &quot;2006-08-06&quot;
3100 },
3101 {
3102 &quot;end-date&quot;: &quot;2010-01-26&quot;,
3103 &quot;organizationName&quot;: &quot;geomedia&quot;,
3104 &quot;start-date&quot;: &quot;2010-06-17&quot;
3105 }
3106 ]
3107 } ]
3108</pre></div></div></div></div></div></div>
3109<div class="section">
3110<h2><a name="WITH_Clauses"></a><a name="With_clauses" id="With_clauses">WITH Clauses</a></h2>
3111<p>As in standard SQL, <tt>WITH</tt> clauses are available to improve the modularity of a query. The next query shows an example.</p>
3112<div class="section">
3113<div class="section">
3114<div class="section">
3115<h5><a name="Example"></a>Example</h5>
3116
3117<div class="source">
3118<div class="source">
3119<pre>WITH avgFriendCount AS (
3120 SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
3121 FROM GleambookUsers AS user
3122)[0]
3123SELECT VALUE user
3124FROM GleambookUsers user
3125WHERE ARRAY_COUNT(user.friendIds) &gt; avgFriendCount;
3126</pre></div></div>
3127<p>This query returns:</p>
3128
3129<div class="source">
3130<div class="source">
3131<pre>[ {
3132 &quot;userSince&quot;: &quot;2012-08-20T10:10:00.000Z&quot;,
3133 &quot;friendIds&quot;: [
3134 2,
3135 3,
3136 6,
3137 10
3138 ],
3139 &quot;gender&quot;: &quot;F&quot;,
3140 &quot;name&quot;: &quot;MargaritaStoddard&quot;,
3141 &quot;nickname&quot;: &quot;Mags&quot;,
3142 &quot;alias&quot;: &quot;Margarita&quot;,
3143 &quot;id&quot;: 1,
3144 &quot;employment&quot;: [
3145 {
3146 &quot;organizationName&quot;: &quot;Codetechno&quot;,
3147 &quot;start-date&quot;: &quot;2006-08-06&quot;
3148 },
3149 {
3150 &quot;end-date&quot;: &quot;2010-01-26&quot;,
3151 &quot;organizationName&quot;: &quot;geomedia&quot;,
3152 &quot;start-date&quot;: &quot;2010-06-17&quot;
3153 }
3154 ]
3155}, {
3156 &quot;userSince&quot;: &quot;2012-07-10T10:10:00.000Z&quot;,
3157 &quot;friendIds&quot;: [
3158 1,
3159 5,
3160 8,
3161 9
3162 ],
3163 &quot;name&quot;: &quot;EmoryUnk&quot;,
3164 &quot;alias&quot;: &quot;Emory&quot;,
3165 &quot;id&quot;: 3,
3166 &quot;employment&quot;: [
3167 {
3168 &quot;organizationName&quot;: &quot;geomedia&quot;,
3169 &quot;endDate&quot;: &quot;2010-01-26&quot;,
3170 &quot;startDate&quot;: &quot;2010-06-17&quot;
3171 }
3172 ]
3173} ]
3174</pre></div></div>
3175<p>The query is equivalent to the following, more complex, inlined form of the query:</p>
3176
3177<div class="source">
3178<div class="source">
3179<pre>SELECT *
3180FROM GleambookUsers user
3181WHERE ARRAY_COUNT(user.friendIds) &gt;
3182 ( SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
3183 FROM GleambookUsers AS user
3184 ) [0];
3185</pre></div></div>
3186<p>WITH can be particularly useful when a value needs to be used several times in a query.</p>
3187<p>Before proceeding further, notice that both the WITH query and its equivalent inlined variant include the syntax &#x201c;[0]&#x201d; &#x2013; this is due to a noteworthy difference between SQL++ and SQL-92. In SQL-92, whenever a scalar value is expected and it is being produced by a query expression, the SQL-92 query processor will evaluate the expression, check that there is only one row and column in the result at runtime, and then coerce the one-row/one-column tabular result into a scalar value. SQL++, being designed to deal with nested data and schema-less data, does not (and should not) do this. Collection-valued data is perfectly legal in most SQL++ contexts, and its data is schema-less, so a query processor rarely knows exactly what to expect where and such automatic conversion is often not desirable. Thus, in the queries above, the use of &#x201c;[0]&#x201d; extracts the first (i.e., 0th) element of an array-valued query expression&#x2019;s result; this is needed above, even though the result is an array of one element, to extract the only element in the singleton array and obtain the desired scalar for the comparison.</p></div></div></div></div>
3188<div class="section">
3189<h2><a name="LET_Clauses"></a><a name="Let_clauses" id="Let_clauses">LET Clauses</a></h2>
3190<p>Similar to <tt>WITH</tt> clauses, <tt>LET</tt> clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The next query shows an example.</p>
3191<div class="section">
3192<div class="section">
3193<div class="section">
3194<h5><a name="Example"></a>Example</h5>
3195
3196<div class="source">
3197<div class="source">
3198<pre>SELECT u.name AS uname, messages AS messages
3199FROM GleambookUsers u
3200LET messages = (SELECT VALUE m
3201 FROM GleambookMessages m
3202 WHERE m.authorId = u.id)
3203WHERE EXISTS messages;
3204</pre></div></div>
3205<p>This query lists <tt>GleambookUsers</tt> that have posted <tt>GleambookMessages</tt> and shows all authored messages for each listed user. It returns:</p>
3206
3207<div class="source">
3208<div class="source">
3209<pre>[ {
3210 &quot;uname&quot;: &quot;MargaritaStoddard&quot;,
3211 &quot;messages&quot;: [
3212 {
3213 &quot;senderLocation&quot;: [
3214 38.97,
3215 77.49
3216 ],
3217 &quot;inResponseTo&quot;: 1,
3218 &quot;messageId&quot;: 11,
3219 &quot;authorId&quot;: 1,
3220 &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot;
3221 },
3222 {
3223 &quot;senderLocation&quot;: [
3224 41.66,
3225 80.87
3226 ],
3227 &quot;inResponseTo&quot;: 4,
3228 &quot;messageId&quot;: 2,
3229 &quot;authorId&quot;: 1,
3230 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
3231 },
3232 {
3233 &quot;senderLocation&quot;: [
3234 37.73,
3235 97.04
3236 ],
3237 &quot;inResponseTo&quot;: 2,
3238 &quot;messageId&quot;: 4,
3239 &quot;authorId&quot;: 1,
3240 &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot;
3241 },
3242 {
3243 &quot;senderLocation&quot;: [
3244 40.33,
3245 80.87
3246 ],
3247 &quot;inResponseTo&quot;: 11,
3248 &quot;messageId&quot;: 8,
3249 &quot;authorId&quot;: 1,
3250 &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot;
3251 },
3252 {
3253 &quot;senderLocation&quot;: [
3254 42.5,
3255 70.01
3256 ],
3257 &quot;inResponseTo&quot;: 12,
3258 &quot;messageId&quot;: 10,
3259 &quot;authorId&quot;: 1,
3260 &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot;
3261 }
3262 ]
3263}, {
3264 &quot;uname&quot;: &quot;IsbelDull&quot;,
3265 &quot;messages&quot;: [
3266 {
3267 &quot;senderLocation&quot;: [
3268 31.5,
3269 75.56
3270 ],
3271 &quot;inResponseTo&quot;: 1,
3272 &quot;messageId&quot;: 6,
3273 &quot;authorId&quot;: 2,
3274 &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot;
3275 },
3276 {
3277 &quot;senderLocation&quot;: [
3278 48.09,
3279 81.01
3280 ],
3281 &quot;inResponseTo&quot;: 4,
3282 &quot;messageId&quot;: 3,
3283 &quot;authorId&quot;: 2,
3284 &quot;message&quot;: &quot; like product-y the plan is amazing&quot;
3285 }
3286 ]
3287} ]
3288</pre></div></div>
3289<p>This query is equivalent to the following query that does not use the <tt>LET</tt> clause:</p>
3290
3291<div class="source">
3292<div class="source">
3293<pre>SELECT u.name AS uname, ( SELECT VALUE m
3294 FROM GleambookMessages m
3295 WHERE m.authorId = u.id
3296 ) AS messages
3297FROM GleambookUsers u
3298WHERE EXISTS ( SELECT VALUE m
3299 FROM GleambookMessages m
3300 WHERE m.authorId = u.id
3301 );
3302</pre></div></div></div></div></div></div>
3303<div class="section">
3304<h2><a name="UNION_ALL"></a><a name="Union_all" id="Union_all">UNION ALL</a></h2>
3305<p>UNION ALL can be used to combine two input arrays or multisets into one. As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, SQL++ does not constrain what the data looks like on the input streams; in particular, it allows heterogenity on the input and output streams. A type error will be raised if one of the inputs is not a collection. The following odd but legal query is an example:</p>
3306<div class="section">
3307<div class="section">
3308<div class="section">
3309<h5><a name="Example"></a>Example</h5>
3310
3311<div class="source">
3312<div class="source">
3313<pre>SELECT u.name AS uname
3314FROM GleambookUsers u
3315WHERE u.id = 2
3316 UNION ALL
3317SELECT VALUE m.message
3318FROM GleambookMessages m
3319WHERE authorId=2;
3320</pre></div></div>
3321<p>This query returns:</p>
3322
3323<div class="source">
3324<div class="source">
3325<pre>[
3326 &quot; like product-z its platform is mind-blowing&quot;
3327 , {
3328 &quot;uname&quot;: &quot;IsbelDull&quot;
3329}, &quot; like product-y the plan is amazing&quot;
3330 ]
3331</pre></div></div></div></div></div></div>
3332<div class="section">
3333<h2><a name="Subqueries" id="Subqueries">Subqueries</a></h2>
3334<p>In SQL++, an arbitrary subquery can appear anywhere that an expression can appear. Unlike SQL-92, as was just alluded to, the subqueries in a SELECT list or a boolean predicate need not return singleton, single-column relations. Instead, they may return arbitrary collections. For example, the following query is a variant of the prior group-by query examples; it retrieves an array of up to two &#x201c;dislike&#x201d; messages per user.</p>
3335<div class="section">
3336<div class="section">
3337<div class="section">
3338<h5><a name="Example"></a>Example</h5>
3339
3340<div class="source">
3341<div class="source">
3342<pre>SELECT uid,
3343 (SELECT VALUE m.msg
3344 FROM msgs m
3345 WHERE m.msg.message LIKE '%dislike%'
3346 ORDER BY m.msg.messageId
3347 LIMIT 2) AS msgs
3348FROM GleambookMessages message
3349GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
3350</pre></div></div>
3351<p>For our sample data set, this query returns:</p>
3352
3353<div class="source">
3354<div class="source">
3355<pre>[ {
3356 &quot;msgs&quot;: [
3357 {
3358 &quot;senderLocation&quot;: [
3359 41.66,
3360 80.87
3361 ],
3362 &quot;inResponseTo&quot;: 4,
3363 &quot;messageId&quot;: 2,
3364 &quot;authorId&quot;: 1,
3365 &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot;
3366 }
3367 ],
3368 &quot;uid&quot;: 1
3369}, {
3370 &quot;msgs&quot;: [
3371
3372 ],
3373 &quot;uid&quot;: 2
3374} ]
3375</pre></div></div>
3376<p>Note that a subquery, like a top-level <tt>SELECT</tt> statment, always returns a collection &#x2013; regardless of where within a query the subquery occurs &#x2013; and again, its result is never automatically cast into a scalar.</p></div></div></div></div>
3377<div class="section">
3378<h2><a name="SQL_vs._SQL-92"></a><a name="Vs_SQL-92" id="Vs_SQL-92">SQL++ vs. SQL-92</a></h2>
3379<p>SQL++ offers the following additional features beyond SQL-92 (hence the &#x201c;++&#x201d; in its name):</p>
3380
3381<ul>
3382
3383<li>Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.</li>
3384
3385<li>Schema-free: The query language does not assume the existence of a static schema for any data that it processes.</li>
3386
3387<li>Correlated FROM terms: A right-side FROM term expression can refer to variables defined by FROM terms on its left.</li>
3388
3389<li>Powerful GROUP BY: In addition to a set of aggregate functions as in standard SQL, the groups created by the <tt>GROUP BY</tt> clause are directly usable in nested queries and/or to obtain nested results.</li>
3390
3391<li>Generalized SELECT clause: A SELECT clause can return any type of collection, while in SQL-92, a <tt>SELECT</tt> clause has to return a (homogeneous) collection of objects.</li>
3392</ul>
3393<p>The following matrix is a quick &#x201c;SQL-92 compatibility cheat sheet&#x201d; for SQL++.</p>
3394
3395<table border="0" class="table table-striped">
3396 <thead>
3397
3398<tr class="a">
3399
3400<th>Feature </th>
3401
3402<th>SQL++ </th>
3403
3404<th>SQL-92 </th>
3405
3406<th>Why different? </th>
3407 </tr>
3408 </thead>
3409 <tbody>
3410
3411<tr class="b">
3412
3413<td>SELECT * </td>
3414
3415<td>Returns nested objects </td>
3416
3417<td>Returns flattened concatenated objects </td>
3418
3419<td>Nested collections are 1st class citizens </td>
3420 </tr>
3421
3422<tr class="a">
3423
3424<td>SELECT list </td>
3425
3426<td>order not preserved </td>
3427
3428<td>order preserved </td>
3429
3430<td>Fields in a JSON object is not ordered </td>
3431 </tr>
3432
3433<tr class="b">
3434
3435<td>Subquery </td>
3436
3437<td>Returns a collection </td>
3438
3439<td>The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function </td>
3440
3441<td>Nested collections are 1st class citizens </td>
3442 </tr>
3443
3444<tr class="a">
3445
3446<td>LEFT OUTER JOIN </td>
3447
3448<td>Fills in <tt>MISSING</tt>(s) for non-matches </td>
3449
3450<td>Fills in <tt>NULL</tt>(s) for non-matches </td>
3451
3452<td>&#x201c;Absence&#x201d; is more appropriate than &#x201c;unknown&#x201d; here. </td>
3453 </tr>
3454
3455<tr class="b">
3456
3457<td>UNION ALL </td>
3458
3459<td>Allows heterogeneous inputs and output </td>
3460
3461<td>Input streams must be UNION-compatible and output field names are drawn from the first input stream </td>
3462
3463<td>Heterogenity and nested collections are common </td>
3464 </tr>
3465
3466<tr class="a">
3467
3468<td>IN constant_expr </td>
3469
3470<td>The constant expression has to be an array or multiset, i.e., [..,..,&#x2026;] </td>
3471
3472<td>The constant collection can be represented as comma-separated items in a paren pair </td>
3473
3474<td>Nested collections are 1st class citizens </td>
3475 </tr>
3476
3477<tr class="b">
3478
3479<td>String literal </td>
3480
3481<td>Double quotes or single quotes </td>
3482
3483<td>Single quotes only </td>
3484
3485<td>Double quoted strings are pervasive </td>
3486 </tr>
3487
3488<tr class="a">
3489
3490<td>Delimited identifiers </td>
3491
3492<td>Backticks </td>
3493
3494<td>Double quotes </td>
3495
3496<td>Double quoted strings are pervasive </td>
3497 </tr>
3498 </tbody>
3499</table>
3500<p>The following SQL-92 features are not implemented yet. However, SQL++ does not conflict those features:</p>
3501
3502<ul>
3503
3504<li>CROSS JOIN, NATURAL JOIN, UNION JOIN</li>
3505
3506<li>RIGHT and FULL OUTER JOIN</li>
3507
3508<li>INTERSECT, EXCEPT, UNION with set semantics</li>
3509
3510<li>CAST expression</li>
3511
3512<li>NULLIF expression</li>
3513
3514<li>COALESCE expression</li>
3515
3516<li>ALL and SOME predicates for linking to subqueries</li>
3517
3518<li>UNIQUE predicate (tests a collection for duplicates)</li>
3519
3520<li>MATCH predicate (tests for referential integrity)</li>
3521
3522<li>Row and Table constructors</li>
3523
3524<li>DISTINCT aggregates</li>
3525
3526<li>Preserved order for expressions in a SELECT list</li>
3527</ul>
3528<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3529 ! or more contributor license agreements. See the NOTICE file
3530 ! distributed with this work for additional information
3531 ! regarding copyright ownership. The ASF licenses this file
3532 ! to you under the Apache License, Version 2.0 (the
3533 ! "License"); you may not use this file except in compliance
3534 ! with the License. You may obtain a copy of the License at
3535 !
3536 ! http://www.apache.org/licenses/LICENSE-2.0
3537 !
3538 ! Unless required by applicable law or agreed to in writing,
3539 ! software distributed under the License is distributed on an
3540 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3541 ! KIND, either express or implied. See the License for the
3542 ! specific language governing permissions and limitations
3543 ! under the License.
3544 ! -->
3545<h1><a name="Errors" id="Errors">4. Errors</a></h1>
3546<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3547 ! or more contributor license agreements. See the NOTICE file
3548 ! distributed with this work for additional information
3549 ! regarding copyright ownership. The ASF licenses this file
3550 ! to you under the Apache License, Version 2.0 (the
3551 ! "License"); you may not use this file except in compliance
3552 ! with the License. You may obtain a copy of the License at
3553 !
3554 ! http://www.apache.org/licenses/LICENSE-2.0
3555 !
3556 ! Unless required by applicable law or agreed to in writing,
3557 ! software distributed under the License is distributed on an
3558 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3559 ! KIND, either express or implied. See the License for the
3560 ! specific language governing permissions and limitations
3561 ! under the License.
3562 ! -->
3563<p>A SQL++ query can potentially result in one of the following errors:</p>
3564
3565<ul>
3566
3567<li>syntax error,</li>
3568
3569<li>identifier resolution error,</li>
3570
3571<li>type error,</li>
3572
3573<li>resource error.</li>
3574</ul>
3575<p>If the query processor runs into any error, it will terminate the ongoing processing of the query and immediately return an error message to the client.</p></div>
3576<div class="section">
3577<h2><a name="Syntax_Errors"></a><a name="Syntax_errors" id="Syntax_errors">Syntax Errors</a></h2>
3578<p>An valid SQL++ query must satisfy the SQL++ grammar rules. Otherwise, a syntax error will be raised.</p>
3579<div class="section">
3580<div class="section">
3581<div class="section">
3582<h5><a name="Example"></a>Example</h5>
3583
3584<div class="source">
3585<div class="source">
3586<pre>SELECT *
3587GleambookUsers user
3588</pre></div></div>
3589<p>Since the query misses a <tt>FROM</tt> keyword before the dataset <tt>GleambookUsers</tt>, we will get a syntax error as follows:</p>
3590
3591<div class="source">
3592<div class="source">
3593<pre>Syntax error: In line 2 &gt;&gt;GleambookUsers user;&lt;&lt; Encountered &lt;IDENTIFIER&gt; \&quot;GleambookUsers\&quot; at column 1.
3594</pre></div></div></div>
3595<div class="section">
3596<h5><a name="Example"></a>Example</h5>
3597
3598<div class="source">
3599<div class="source">
3600<pre>SELECT *
3601FROM GleambookUsers user
3602WHERE type=&quot;advertiser&quot;;
3603</pre></div></div>
3604<p>Since &#x201c;type&#x201d; is a reserved keyword in the SQL++ parser, we will get a syntax error as follows:</p>
3605
3606<div class="source">
3607<div class="source">
3608<pre>Error: Syntax error: In line 3 &gt;&gt;WHERE type=&quot;advertiser&quot;;&lt;&lt; Encountered 'type' &quot;type&quot; at column 7.
3609==&gt; WHERE type=&quot;advertiser&quot;;
3610</pre></div></div></div></div></div></div>
3611<div class="section">
3612<h2><a name="Identifier_Resolution_Errors"></a><a name="Identifier_resolution_errors" id="Identifier_resolution_errors">Identifier Resolution Errors</a></h2>
3613<p>Referring an undefined identifier can cause an error if the identifier cannot be successfully resolved as a valid field access.</p>
3614<div class="section">
3615<div class="section">
3616<div class="section">
3617<h5><a name="Example"></a>Example</h5>
3618
3619<div class="source">
3620<div class="source">
3621<pre>SELECT *
3622FROM GleambookUser user;
3623</pre></div></div>
3624<p>Assume we have a typo in &#x201c;GleambookUser&#x201d; which misses the ending &#x201c;s&#x201d;, we will get an identifier resolution error as follows:</p>
3625
3626<div class="source">
3627<div class="source">
3628<pre>Error: Cannot find dataset GleambookUser in dataverse Default nor an alias with name GleambookUser!
3629</pre></div></div></div>
3630<div class="section">
3631<h5><a name="Example"></a>Example</h5>
3632
3633<div class="source">
3634<div class="source">
3635<pre>SELECT name, message
3636FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
3637</pre></div></div>
3638<p>If the compiler cannot figure out all possible fields in <tt>GleambookUsers</tt> and <tt>GleambookMessages</tt>, we will get an identifier resolution error as follows:</p>
3639
3640<div class="source">
3641<div class="source">
3642<pre>Error: Cannot resolve ambiguous alias reference for undefined identifier name
3643</pre></div></div></div></div></div></div>
3644<div class="section">
3645<h2><a name="Type_Errors"></a><a name="Type_errors" id="Type_errors">Type Errors</a></h2>
3646<p>The SQL++ compiler does type checks based on its available type information. In addition, the SQL++ runtime also reports type errors if a data model instance it processes does not satisfy the type requirement.</p>
3647<div class="section">
3648<div class="section">
3649<div class="section">
3650<h5><a name="Example"></a>Example</h5>
3651
3652<div class="source">
3653<div class="source">
3654<pre>abs(&quot;123&quot;);
3655</pre></div></div>
3656<p>Since function <tt>abs</tt> can only process numeric input values, we will get a type error as follows:</p>
3657
3658<div class="source">
3659<div class="source">
3660<pre>Error: Arithmetic operations are not implemented for string
3661</pre></div></div></div></div></div></div>
3662<div class="section">
3663<h2><a name="Resource_Errors"></a><a name="Resource_errors" id="Resource_errors">Resource Errors</a></h2>
3664<p>A query can potentially exhaust system resources, such as the number of open files and disk spaces. For instance, the following two resource errors could be potentially be seen when running the system:</p>
3665
3666<div class="source">
3667<div class="source">
3668<pre>Error: no space left on device
3669Error: too many open files
3670</pre></div></div>
3671<p>The &#x201c;no space left on device&#x201d; issue usually can be fixed by cleaning up disk spaces and reserving more disk spaces for the system. The &#x201c;too many open files&#x201d; issue usually can be fixed by a system administrator, following the instructions <a class="externalLink" href="https://easyengine.io/tutorials/linux/increase-open-files-limit/">here</a>.</p>
3672<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3673 ! or more contributor license agreements. See the NOTICE file
3674 ! distributed with this work for additional information
3675 ! regarding copyright ownership. The ASF licenses this file
3676 ! to you under the Apache License, Version 2.0 (the
3677 ! "License"); you may not use this file except in compliance
3678 ! with the License. You may obtain a copy of the License at
3679 !
3680 ! http://www.apache.org/licenses/LICENSE-2.0
3681 !
3682 ! Unless required by applicable law or agreed to in writing,
3683 ! software distributed under the License is distributed on an
3684 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3685 ! KIND, either express or implied. See the License for the
3686 ! specific language governing permissions and limitations
3687 ! under the License.
3688 ! -->
3689<h1><a name="DDL_and_DML_statements" id="DDL_and_DML_statements">5. DDL and DML statements</a></h1>
3690
3691<div class="source">
3692<div class="source">
3693<pre>Statement ::= ( SingleStatement ( &quot;;&quot; )? )* &lt;EOF&gt;
3694SingleStatement ::= DatabaseDeclaration
3695 | FunctionDeclaration
3696 | CreateStatement
3697 | DropStatement
3698 | LoadStatement
3699 | SetStatement
3700 | InsertStatement
3701 | DeleteStatement
3702 | Query &quot;;&quot;
3703</pre></div></div>
3704<p>In addition to queries, an implementation of SQL++ needs to support statements for data definition and manipulation purposes as well as controlling the context to be used in evaluating SQL++ expressions. This section details the DDL and DML statements supported in the SQL++ language as realized today in Apache AsterixDB.</p>
3705<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3706 ! or more contributor license agreements. See the NOTICE file
3707 ! distributed with this work for additional information
3708 ! regarding copyright ownership. The ASF licenses this file
3709 ! to you under the Apache License, Version 2.0 (the
3710 ! "License"); you may not use this file except in compliance
3711 ! with the License. You may obtain a copy of the License at
3712 !
3713 ! http://www.apache.org/licenses/LICENSE-2.0
3714 !
3715 ! Unless required by applicable law or agreed to in writing,
3716 ! software distributed under the License is distributed on an
3717 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3718 ! KIND, either express or implied. See the License for the
3719 ! specific language governing permissions and limitations
3720 ! under the License.
3721 ! --></div>
3722<div class="section">
3723<h2><a name="Lifecycle_Management_Statements"></a><a name="Lifecycle_management_statements" id="Lifecycle_management_statements">Lifecycle Management Statements</a></h2>
3724
3725<div class="source">
3726<div class="source">
3727<pre>CreateStatement ::= &quot;CREATE&quot; ( DatabaseSpecification
3728 | TypeSpecification
3729 | DatasetSpecification
3730 | IndexSpecification
3731 | FunctionSpecification )
3732
3733QualifiedName ::= Identifier ( &quot;.&quot; Identifier )?
3734DoubleQualifiedName ::= Identifier &quot;.&quot; Identifier ( &quot;.&quot; Identifier )?
3735</pre></div></div>
3736<p>The CREATE statement in SQL++ is used for creating dataverses as well as other persistent artifacts in a dataverse. It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined SQL++ functions.</p>
3737<div class="section">
3738<h3><a name="Dataverses" id="Dataverses"> Dataverses</a></h3>
3739
3740<div class="source">
3741<div class="source">
3742<pre>DatabaseSpecification ::= &quot;DATAVERSE&quot; Identifier IfNotExists
3743</pre></div></div>
3744<p>The CREATE DATAVERSE statement is used to create new dataverses. To ease the authoring of reusable SQL++ scripts, an optional IF NOT EXISTS clause is included to allow creation to be requested either unconditionally or only if the dataverse does not already exist. If this clause is absent, an error is returned if a dataverse with the indicated name already exists.</p>
3745<p>The following example creates a new dataverse named TinySocial if one does not already exist.</p>
3746<div class="section">
3747<div class="section">
3748<h5><a name="Example"></a>Example</h5>
3749
3750<div class="source">
3751<div class="source">
3752<pre>CREATE DATAVERSE TinySocial IF NOT EXISTS;
3753</pre></div></div></div></div></div>
3754<div class="section">
3755<h3><a name="Types" id="Types"> Types</a></h3>
3756
3757<div class="source">
3758<div class="source">
3759<pre>TypeSpecification ::= &quot;TYPE&quot; FunctionOrTypeName IfNotExists &quot;AS&quot; ObjectTypeDef
3760FunctionOrTypeName ::= QualifiedName
3761IfNotExists ::= ( &lt;IF&gt; &lt;NOT&gt; &lt;EXISTS&gt; )?
3762TypeExpr ::= ObjectTypeDef | TypeReference | ArrayTypeDef | MultisetTypeDef
3763ObjectTypeDef ::= ( &lt;CLOSED&gt; | &lt;OPEN&gt; )? &quot;{&quot; ( ObjectField ( &quot;,&quot; ObjectField )* )? &quot;}&quot;
3764ObjectField ::= Identifier &quot;:&quot; ( TypeExpr ) ( &quot;?&quot; )?
3765NestedField ::= Identifier ( &quot;.&quot; Identifier )*
3766IndexField ::= NestedField ( &quot;:&quot; TypeReference )?
3767TypeReference ::= Identifier
3768ArrayTypeDef ::= &quot;[&quot; ( TypeExpr ) &quot;]&quot;
3769MultisetTypeDef ::= &quot;{{&quot; ( TypeExpr ) &quot;}}&quot;
3770</pre></div></div>
3771<p>The CREATE TYPE statement is used to create a new named datatype. This type can then be used to create stored collections or utilized when defining one or more other datatypes. Much more information about the data model is available in the <a href="../datamodel.html">data model reference guide</a>. A new type can be a object type, a renaming of another type, an array type, or a multiset type. A object type can be defined as being either open or closed. Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement. Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.</p>
3772<p>The following example creates a new object type called GleambookUser type. Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. The first four fields are essentially traditional typed name/value pairs (much like SQL fields). The friendIds field is a multiset of integers. The employment field is an array of instances of another named object type, EmploymentType.</p>
3773<div class="section">
3774<div class="section">
3775<h5><a name="Example"></a>Example</h5>
3776
3777<div class="source">
3778<div class="source">
3779<pre>CREATE TYPE GleambookUserType AS {
3780 id: int,
3781 alias: string,
3782 name: string,
3783 userSince: datetime,
3784 friendIds: {{ int }},
3785 employment: [ EmploymentType ]
3786};
3787</pre></div></div>
3788<p>The next example creates a new object type, closed this time, called MyUserTupleType. Instances of this closed type will not be permitted to have extra fields, although the alias field is marked as optional and may thus be NULL or MISSING in legal instances of the type. Note that the type of the id field in the example is UUID. This field type can be used if you want to have this field be an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)</p></div>
3789<div class="section">
3790<h5><a name="Example"></a>Example</h5>
3791
3792<div class="source">
3793<div class="source">
3794<pre>CREATE TYPE MyUserTupleType AS CLOSED {
3795 id: uuid,
3796 alias: string?,
3797 name: string
3798};
3799</pre></div></div></div></div></div>
3800<div class="section">
3801<h3><a name="Datasets" id="Datasets"> Datasets</a></h3>
3802
3803<div class="source">
3804<div class="source">
3805<pre>DatasetSpecification ::= ( &lt;INTERNAL&gt; )? &lt;DATASET&gt; QualifiedName &quot;(&quot; QualifiedName &quot;)&quot; IfNotExists
3806 PrimaryKey ( &lt;ON&gt; Identifier )? ( &lt;HINTS&gt; Properties )?
3807 ( &quot;USING&quot; &quot;COMPACTION&quot; &quot;POLICY&quot; CompactionPolicy ( Configuration )? )?
3808 ( &lt;WITH&gt; &lt;FILTER&gt; &lt;ON&gt; Identifier )?
3809 |
3810 &lt;EXTERNAL&gt; &lt;DATASET&gt; QualifiedName &quot;(&quot; QualifiedName &quot;)&quot; IfNotExists &lt;USING&gt; AdapterName
3811 Configuration ( &lt;HINTS&gt; Properties )?
3812 ( &lt;USING&gt; &lt;COMPACTION&gt; &lt;POLICY&gt; CompactionPolicy ( Configuration )? )?
3813AdapterName ::= Identifier
3814Configuration ::= &quot;(&quot; ( KeyValuePair ( &quot;,&quot; KeyValuePair )* )? &quot;)&quot;
3815KeyValuePair ::= &quot;(&quot; StringLiteral &quot;=&quot; StringLiteral &quot;)&quot;
3816Properties ::= ( &quot;(&quot; Property ( &quot;,&quot; Property )* &quot;)&quot; )?
3817Property ::= Identifier &quot;=&quot; ( StringLiteral | IntegerLiteral )
3818FunctionSignature ::= FunctionOrTypeName &quot;@&quot; IntegerLiteral
3819PrimaryKey ::= &lt;PRIMARY&gt; &lt;KEY&gt; NestedField ( &quot;,&quot; NestedField )* ( &lt;AUTOGENERATED&gt; )?
3820CompactionPolicy ::= Identifier
3821</pre></div></div>
3822<p>The CREATE DATASET statement is used to create a new dataset. Datasets are named, multisets of object type instances; they are where data lives persistently and are the usual targets for SQL++ queries. Datasets are typed, and the system ensures that their contents conform to their type definitions. An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system. It is required to have a specified unique primary key field which uniquely identifies the contained objects. (The primary key is also used in secondary indexes to identify the indexed primary data objects.)</p>
3823<p>Internal datasets contain several advanced options that can be specified when appropriate. One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting &#x201c;AUTOGENERATED&#x201d; after the &#x201c;PRIMARY KEY&#x201d; identifier. In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object&#x2019;s primary key field value will be auto-generated by the system.</p>
3824<p>Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the underlying LSM storage components to be merged. (The system supports Log-Structured Merge tree based physical storage for Internal datasets.) Currently the system supports four different component merging policies that can be chosen per dataset: no-merge, constant, prefix, and correlated-prefix. The no-merge policy simply never merges disk components. The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user. The prefix policy relies on both component sizes and the number of components to decide which components to merge. It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component&#x2019;s sizes exceeds M or the number of components in the sequence exceeds another threshold C. If such a sequence exists, the components in the sequence are merged together to form a single component. Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset. The system&#x2019;s default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.</p>
3825<p>Another advanced option shown in the syntax above, related to performance and mentioned above, is that a <b>filter</b> can optionally be created on a field to further optimize range queries with predicates on the filter&#x2019;s field. Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter. (Refer to <a href="../filters.html">Filter-Based LSM Index Acceleration</a> for more information about filters.)</p>
3826<p>An External dataset, in contrast to an Internal dataset, has data stored outside of the system&#x2019;s control. Files living in HDFS or in the local filesystem(s) of a cluster&#x2019;s nodes are currently supported. External dataset support allows SQL++ queries to treat foreign data as though it were stored in the system, making it possible to query &#x201c;legacy&#x201d; file data (for example, Hive data) without having to physically import it. When defining an External dataset, an appropriate adapter type must be selected for the desired external data. (See the <a href="../externaldata.html">Guide to External Data</a> for more information on the available adapters.)</p>
3827<p>The following example creates an Internal dataset for storing FacefookUserType objects. It specifies that their id field is their primary key.</p>
3828<div class="section">
3829<h4><a name="Example"></a>Example</h4>
3830
3831<div class="source">
3832<div class="source">
3833<pre>CREATE INTERNAL DATASET GleambookUsers(GleambookUserType) PRIMARY KEY id;
3834</pre></div></div>
3835<p>The next example creates another Internal dataset (the default kind when no dataset kind is specified) for storing MyUserTupleType objects. It specifies that the id field should be used as the primary key for the dataset. It also specifies that the id field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.) Note that the id field&#x2019;s declared type must be UUID in this case.</p></div>
3836<div class="section">
3837<h4><a name="Example"></a>Example</h4>
3838
3839<div class="source">
3840<div class="source">
3841<pre>CREATE DATASET MyUsers(MyUserTupleType) PRIMARY KEY id AUTOGENERATED;
3842</pre></div></div>
3843<p>The next example creates an External dataset for querying LineItemType objects. The choice of the <tt>hdfs</tt> adapter means that this dataset&#x2019;s data actually resides in HDFS. The example CREATE statement also provides parameters used by the hdfs adapter: the URL and path needed to locate the data in HDFS and a description of the data format.</p></div>
3844<div class="section">
3845<h4><a name="Example"></a>Example</h4>
3846
3847<div class="source">
3848<div class="source">
3849<pre>CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
3850 (&quot;hdfs&quot;=&quot;hdfs://HOST:PORT&quot;),
3851 (&quot;path&quot;=&quot;HDFS_PATH&quot;),
3852 (&quot;input-format&quot;=&quot;text-input-format&quot;),
3853 (&quot;format&quot;=&quot;delimited-text&quot;),
3854 (&quot;delimiter&quot;=&quot;|&quot;));
3855</pre></div></div></div></div>
3856<div class="section">
3857<h3><a name="Indices" id="Indices">Indices</a></h3>
3858
3859<div class="source">
3860<div class="source">
3861<pre>IndexSpecification ::= &lt;INDEX&gt; Identifier IfNotExists &lt;ON&gt; QualifiedName
3862 &quot;(&quot; ( IndexField ) ( &quot;,&quot; IndexField )* &quot;)&quot; ( &quot;type&quot; IndexType &quot;?&quot;)?
3863 ( (&lt;NOT&gt;)? &lt;ENFORCED&gt; )?
3864IndexType ::= &lt;BTREE&gt; | &lt;RTREE&gt; | &lt;KEYWORD&gt; | &lt;NGRAM&gt; &quot;(&quot; IntegerLiteral &quot;)&quot;
3865</pre></div></div>
3866<p>The CREATE INDEX statement creates a secondary index on one or more fields of a specified dataset. Supported index types include <tt>BTREE</tt> for totally ordered datatypes, <tt>RTREE</tt> for spatial data, and <tt>KEYWORD</tt> and <tt>NGRAM</tt> for textual (string) data. An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier.</p>
3867<p>An indexed field is not required to be part of the datatype associated with a dataset if the dataset&#x2019;s datatype is declared as open <b>and</b> if the field&#x2019;s type is provided along with its name and if the <tt>ENFORCED</tt> keyword is specified at the end of the index definition. <tt>ENFORCING</tt> an open field introduces a check that makes sure that the actual type of the indexed field (if the optional field exists in the object) always matches this specified (open) field type.</p>
3868<p>The following example creates a btree index called gbAuthorIdx on the authorId field of the GleambookMessages dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the author-id field.</p>
3869<div class="section">
3870<h4><a name="Example"></a>Example</h4>
3871
3872<div class="source">
3873<div class="source">
3874<pre>CREATE INDEX gbAuthorIdx ON GleambookMessages(authorId) TYPE BTREE;
3875</pre></div></div>
3876<p>The following example creates an open btree index called gbSendTimeIdx on the (non-predeclared) sendTime field of the GleambookMessages dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the sendTime field. The index is enforced so that records that do not have the &#x201c;sendTime&#x201d; field or have a mismatched type on the field cannot be inserted into the dataset.</p></div>
3877<div class="section">
3878<h4><a name="Example"></a>Example</h4>
3879
3880<div class="source">
3881<div class="source">
3882<pre>CREATE INDEX gbSendTimeIdx ON GleambookMessages(sendTime: datetime?) TYPE BTREE ENFORCED;
3883</pre></div></div>
3884<p>The following example creates a btree index called crpUserScrNameIdx on screenName, a nested field residing within a object-valued user field in the ChirpMessages dataset. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the nested screenName field. Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field.</p></div>
3885<div class="section">
3886<h4><a name="Example"></a>Example</h4>
3887
3888<div class="source">
3889<div class="source">
3890<pre>CREATE INDEX crpUserScrNameIdx ON ChirpMessages(user.screenName) TYPE BTREE;
3891</pre></div></div>
3892<p>The following example creates an rtree index called gbSenderLocIdx on the sender-location field of the GleambookMessages dataset. This index can be useful for accelerating queries that use the <a href="functions.html#spatial-intersect"><tt>spatial-intersect</tt> function</a> in a predicate involving the sender-location field.</p></div>
3893<div class="section">
3894<h4><a name="Example"></a>Example</h4>
3895
3896<div class="source">
3897<div class="source">
3898<pre>CREATE INDEX gbSenderLocIndex ON GleambookMessages(&quot;sender-location&quot;) TYPE RTREE;
3899</pre></div></div>
3900<p>The following example creates a 3-gram index called fbUserIdx on the name field of the GleambookUsers dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on <a href="similarity.html#NGram_Index">similarity queries</a>.</p></div>
3901<div class="section">
3902<h4><a name="Example"></a>Example</h4>
3903
3904<div class="source">
3905<div class="source">
3906<pre>CREATE INDEX fbUserIdx ON GleambookUsers(name) TYPE NGRAM(3);
3907</pre></div></div>
3908<p>The following example creates a keyword index called fbMessageIdx on the message field of the GleambookMessages dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the message field. For details refer to the document on <a href="similarity.html#Keyword_Index">similarity queries</a>.</p></div>
3909<div class="section">
3910<h4><a name="Example"></a>Example</h4>
3911
3912<div class="source">
3913<div class="source">
3914<pre>CREATE INDEX fbMessageIdx ON GleambookMessages(message) TYPE KEYWORD;
3915</pre></div></div>
3916<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3917 ! or more contributor license agreements. See the NOTICE file
3918 ! distributed with this work for additional information
3919 ! regarding copyright ownership. The ASF licenses this file
3920 ! to you under the Apache License, Version 2.0 (the
3921 ! "License"); you may not use this file except in compliance
3922 ! with the License. You may obtain a copy of the License at
3923 !
3924 ! http://www.apache.org/licenses/LICENSE-2.0
3925 !
3926 ! Unless required by applicable law or agreed to in writing,
3927 ! software distributed under the License is distributed on an
3928 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3929 ! KIND, either express or implied. See the License for the
3930 ! specific language governing permissions and limitations
3931 ! under the License.
3932 ! -->
3933<p>The following example creates an open btree index called gbReadTimeIdx on the (non-predeclared) readTime field of the GleambookMessages dataset having datetime type. This index can be useful for accelerating exact-match queries, range search queries, and joins involving the <tt>readTime</tt> field. The index is not enforced so that records that do not have the <tt>readTime</tt> field or have a mismatched type on the field can still be inserted into the dataset.</p></div>
3934<div class="section">
3935<h4><a name="Example"></a>Example</h4>
3936
3937<div class="source">
3938<div class="source">
3939<pre>CREATE INDEX gbReadTimeIdx ON GleambookMessages(readTime: datetime?);
3940</pre></div></div>
3941<!-- ! Licensed to the Apache Software Foundation (ASF) under one
3942 ! or more contributor license agreements. See the NOTICE file
3943 ! distributed with this work for additional information
3944 ! regarding copyright ownership. The ASF licenses this file
3945 ! to you under the Apache License, Version 2.0 (the
3946 ! "License"); you may not use this file except in compliance
3947 ! with the License. You may obtain a copy of the License at
3948 !
3949 ! http://www.apache.org/licenses/LICENSE-2.0
3950 !
3951 ! Unless required by applicable law or agreed to in writing,
3952 ! software distributed under the License is distributed on an
3953 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
3954 ! KIND, either express or implied. See the License for the
3955 ! specific language governing permissions and limitations
3956 ! under the License.
3957 ! --></div></div>
3958<div class="section">
3959<h3><a name="Functions" id="Functions"> Functions</a></h3>
3960<p>The create function statement creates a <b>named</b> function that can then be used and reused in SQL++ queries. The body of a function can be any SQL++ expression involving the function&#x2019;s parameters.</p>
3961
3962<div class="source">
3963<div class="source">
3964<pre>FunctionSpecification ::= &quot;FUNCTION&quot; FunctionOrTypeName IfNotExists ParameterList &quot;{&quot; Expression &quot;}&quot;
3965</pre></div></div>
3966<p>The following is an example of a CREATE FUNCTION statement which is similar to our earlier DECLARE FUNCTION example. It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).</p>
3967<div class="section">
3968<div class="section">
3969<h5><a name="Example"></a>Example</h5>
3970
3971<div class="source">
3972<div class="source">
3973<pre>CREATE FUNCTION friendInfo(userId) {
3974 (SELECT u.id, u.name, len(u.friendIds) AS friendCount
3975 FROM GleambookUsers u
3976 WHERE u.id = userId)[0]
3977 };
3978</pre></div></div></div></div></div>
3979<div class="section">
3980<h3><a name="Removal" id="Removal"> Removal</a></h3>
3981
3982<div class="source">
3983<div class="source">
3984<pre>DropStatement ::= &quot;DROP&quot; ( &quot;DATAVERSE&quot; Identifier IfExists
3985 | &quot;TYPE&quot; FunctionOrTypeName IfExists
3986 | &quot;DATASET&quot; QualifiedName IfExists
3987 | &quot;INDEX&quot; DoubleQualifiedName IfExists
3988 | &quot;FUNCTION&quot; FunctionSignature IfExists )
3989IfExists ::= ( &quot;IF&quot; &quot;EXISTS&quot; )?
3990</pre></div></div>
3991<p>The DROP statement in SQL++ is the inverse of the CREATE statement. It can be used to drop dataverses, datatypes, datasets, indexes, and functions.</p>
3992<p>The following examples illustrate some uses of the DROP statement.</p>
3993<div class="section">
3994<div class="section">
3995<h5><a name="Example"></a>Example</h5>
3996
3997<div class="source">
3998<div class="source">
3999<pre>DROP DATASET GleambookUsers IF EXISTS;
4000
4001DROP INDEX GleambookMessages.gbSenderLocIndex;
4002
4003DROP TYPE TinySocial2.GleambookUserType;
4004
4005DROP FUNCTION friendInfo@1;
4006
4007DROP DATAVERSE TinySocial;
4008</pre></div></div>
4009<p>When an artifact is dropped, it will be droppped from the current dataverse if none is specified (see the DROP DATASET example above) or from the specified dataverse (see the DROP TYPE example above) if one is specified by fully qualifying the artifact name in the DROP statement. When specifying an index to drop, the index name must be qualified by the dataset that it indexes. When specifying a function to drop, since SQL++ allows functions to be overloaded by their number of arguments, the identifying name of the function to be dropped must explicitly include that information. (<tt>friendInfo@1</tt> above denotes the 1-argument function named friendInfo in the current dataverse.)</p></div></div></div>
4010<div class="section">
4011<h3><a name="Load_Statement"></a><a name="Load_statement" id="Load_statement">Load Statement</a></h3>
4012
4013<div class="source">
4014<div class="source">
4015<pre>LoadStatement ::= &lt;LOAD&gt; &lt;DATASET&gt; QualifiedName &lt;USING&gt; AdapterName Configuration ( &lt;PRE-SORTED&gt; )?
4016</pre></div></div>
4017<p>The LOAD statement is used to initially populate a dataset via bulk loading of data from an external file. An appropriate adapter must be selected to handle the nature of the desired external data. The LOAD statement accepts the same adapters and the same parameters as discussed earlier for External datasets. (See the <a href="externaldata.html">guide to external data</a> for more information on the available adapters.) If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.</p>
4018<p>The following example shows how to bulk load the GleambookUsers dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.</p>
4019<div class="section">
4020<div class="section">
4021<h5><a name="Example"></a>Example</h5>
4022
4023<div class="source">
4024<div class="source">
4025<pre> LOAD DATASET GleambookUsers USING localfs
4026 ((&quot;path&quot;=&quot;127.0.0.1:///Users/bignosqlfan/tinysocialnew/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
4027</pre></div></div>
4028<!-- ! Licensed to the Apache Software Foundation (ASF) under one
4029 ! or more contributor license agreements. See the NOTICE file
4030 ! distributed with this work for additional information
4031 ! regarding copyright ownership. The ASF licenses this file
4032 ! to you under the Apache License, Version 2.0 (the
4033 ! "License"); you may not use this file except in compliance
4034 ! with the License. You may obtain a copy of the License at
4035 !
4036 ! http://www.apache.org/licenses/LICENSE-2.0
4037 !
4038 ! Unless required by applicable law or agreed to in writing,
4039 ! software distributed under the License is distributed on an
4040 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4041 ! KIND, either express or implied. See the License for the
4042 ! specific language governing permissions and limitations
4043 ! under the License.
4044 ! --></div></div></div></div>
4045<div class="section">
4046<h2><a name="Modification_statements" id="Modification_statements">Modification statements</a></h2>
4047<div class="section">
4048<h3><a name="INSERTs"></a><a name="Inserts" id="Inserts">INSERTs</a></h3>
4049
4050<div class="source">
4051<div class="source">
4052<pre>InsertStatement ::= &lt;INSERT&gt; &lt;INTO&gt; QualifiedName Query
4053</pre></div></div>
4054<p>The SQL++ INSERT statement is used to insert new data into a dataset. The data to be inserted comes from a SQL++ query expression. This expression can be as simple as a constant expression, or in general it can be any legal SQL++ query. If the target dataset has an auto-generated primary key field, the insert statement should not include a value for that field in it. (The system will automatically extend the provided object with this additional field and a corresponding value.) Insertion will fail if the dataset already has data with the primary key value(s) being inserted.</p>
4055<p>Inserts are processed transactionally by the system. The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any). If the query part of an insert returns a single object, then the INSERT statement will be a single, atomic transaction. If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction. The following example illustrates a query-based insertion.</p>
4056<div class="section">
4057<div class="section">
4058<h5><a name="Example"></a>Example</h5>
4059
4060<div class="source">
4061<div class="source">
4062<pre>INSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user)
4063</pre></div></div></div></div></div>
4064<div class="section">
4065<h3><a name="UPSERTs"></a><a name="Upserts" id="Upserts">UPSERTs</a></h3>
4066
4067<div class="source">
4068<div class="source">
4069<pre>UpsertStatement ::= &lt;UPSERT&gt; &lt;INTO&gt; QualifiedName Query
4070</pre></div></div>
4071<p>The SQL++ UPSERT statement syntactically mirrors the INSERT statement discussed above. The difference lies in its semantics, which for UPSERT are &#x201c;add or replace&#x201d; instead of the INSERT &#x201c;add if not present, else error&#x201d; semantics. Whereas an INSERT can fail if another object already exists with the specified key, the analogous UPSERT will replace the previous object&#x2019;s value with that of the new object in such cases.</p>
4072<p>The following example illustrates a query-based upsert operation.</p>
4073<div class="section">
4074<div class="section">
4075<h5><a name="Example"></a>Example</h5>
4076
4077<div class="source">
4078<div class="source">
4079<pre>UPSERT INTO UsersCopy (SELECT VALUE user FROM GleambookUsers user)
4080</pre></div></div>
4081<p>*Editor&#x2019;s note: Upserts currently work in AQL but are not yet enabled (at the moment) in SQL++.</p></div></div></div>
4082<div class="section">
4083<h3><a name="DELETEs"></a><a name="Deletes" id="Deletes">DELETEs</a></h3>
4084
4085<div class="source">
4086<div class="source">
4087<pre>DeleteStatement ::= &lt;DELETE&gt; &lt;FROM&gt; QualifiedName ( ( &lt;AS&gt; )? Variable )? ( &lt;WHERE&gt; Expression )?
4088</pre></div></div>
4089<p>The SQL++ DELETE statement is used to delete data from a target dataset. The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the DELETE statement.</p>
4090<p>Deletes are processed transactionally by the system. The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any). If the boolean expression for a delete identifies a single object, then the DELETE statement itself will be a single, atomic transaction. If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.</p>
4091<p>The following examples illustrate single-object deletions.</p>
4092<div class="section">
4093<div class="section">
4094<h5><a name="Example"></a>Example</h5>
4095
4096<div class="source">
4097<div class="source">
4098<pre>DELETE FROM GleambookUsers user WHERE user.id = 8;
4099</pre></div></div></div>
4100<div class="section">
4101<h5><a name="Example"></a>Example</h5>
4102
4103<div class="source">
4104<div class="source">
4105<pre>DELETE FROM GleambookUsers WHERE id = 5;
4106</pre></div></div>
4107<!-- ! Licensed to the Apache Software Foundation (ASF) under one
4108 ! or more contributor license agreements. See the NOTICE file
4109 ! distributed with this work for additional information
4110 ! regarding copyright ownership. The ASF licenses this file
4111 ! to you under the Apache License, Version 2.0 (the
4112 ! "License"); you may not use this file except in compliance
4113 ! with the License. You may obtain a copy of the License at
4114 !
4115 ! http://www.apache.org/licenses/LICENSE-2.0
4116 !
4117 ! Unless required by applicable law or agreed to in writing,
4118 ! software distributed under the License is distributed on an
4119 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4120 ! KIND, either express or implied. See the License for the
4121 ! specific language governing permissions and limitations
4122 ! under the License.
4123 ! -->
4124<h1><a name="Reserved_keywords" id="Reserved_keywords">Appendix 1. Reserved keywords</a></h1>
4125<!-- ! Licensed to the Apache Software Foundation (ASF) under one
4126 ! or more contributor license agreements. See the NOTICE file
4127 ! distributed with this work for additional information
4128 ! regarding copyright ownership. The ASF licenses this file
4129 ! to you under the Apache License, Version 2.0 (the
4130 ! "License"); you may not use this file except in compliance
4131 ! with the License. You may obtain a copy of the License at
4132 !
4133 ! http://www.apache.org/licenses/LICENSE-2.0
4134 !
4135 ! Unless required by applicable law or agreed to in writing,
4136 ! software distributed under the License is distributed on an
4137 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4138 ! KIND, either express or implied. See the License for the
4139 ! specific language governing permissions and limitations
4140 ! under the License.
4141 ! -->
4142<p>All reserved keywords are listed in the following table:</p>
4143
4144<table border="0" class="table table-striped">
4145 <thead>
4146
4147<tr class="a">
4148
4149<th> </th>
4150
4151<th> </th>
4152
4153<th> </th>
4154
4155<th> </th>
4156
4157<th> </th>
4158
4159<th> </th>
4160 </tr>
4161 </thead>
4162 <tbody>
4163
4164<tr class="b">
4165
4166<td>AND </td>
4167
4168<td>ANY </td>
4169
4170<td>APPLY </td>
4171
4172<td>AS </td>
4173
4174<td>ASC </td>
4175
4176<td>AT </td>
4177 </tr>
4178
4179<tr class="a">
4180
4181<td>AUTOGENERATED </td>
4182
4183<td>BETWEEN </td>
4184
4185<td>BTREE </td>
4186
4187<td>BY </td>
4188
4189<td>CASE </td>
4190
4191<td>CLOSED </td>
4192 </tr>
4193
4194<tr class="b">
4195
4196<td>CREATE </td>
4197
4198<td>COMPACTION </td>
4199
4200<td>COMPACT </td>
4201
4202<td>CONNECT </td>
4203
4204<td>CORRELATE </td>
4205
4206<td>DATASET </td>
4207 </tr>
4208
4209<tr class="a">
4210
4211<td>COLLECTION </td>
4212
4213<td>DATAVERSE </td>
4214
4215<td>DECLARE </td>
4216
4217<td>DEFINITION </td>
4218
4219<td>DECLARE </td>
4220
4221<td>DEFINITION </td>
4222 </tr>
4223
4224<tr class="b">
4225
4226<td>DELETE </td>
4227
4228<td>DESC </td>
4229
4230<td>DISCONNECT </td>
4231
4232<td>DISTINCT </td>
4233
4234<td>DROP </td>
4235
4236<td>ELEMENT </td>
4237 </tr>
4238
4239<tr class="a">
4240
4241<td>ELEMENT </td>
4242
4243<td>EXPLAIN </td>
4244
4245<td>ELSE </td>
4246
4247<td>ENFORCED </td>
4248
4249<td>END </td>
4250
4251<td>EVERY </td>
4252 </tr>
4253
4254<tr class="b">
4255
4256<td>EXCEPT </td>
4257
4258<td>EXIST </td>
4259
4260<td>EXTERNAL </td>
4261
4262<td>FEED </td>
4263
4264<td>FILTER </td>
4265
4266<td>FLATTEN </td>
4267 </tr>
4268
4269<tr class="a">
4270
4271<td>FOR </td>
4272
4273<td>FROM </td>
4274
4275<td>FULL </td>
4276
4277<td>FUNCTION </td>
4278
4279<td>GROUP </td>
4280
4281<td>HAVING </td>
4282 </tr>
4283
4284<tr class="b">
4285
4286<td>HINTS </td>
4287
4288<td>IF </td>
4289
4290<td>INTO </td>
4291
4292<td>IN </td>
4293
4294<td>INDEX </td>
4295
4296<td>INGESTION </td>
4297 </tr>
4298
4299<tr class="a">
4300
4301<td>INNER </td>
4302
4303<td>INSERT </td>
4304
4305<td>INTERNAL </td>
4306
4307<td>INTERSECT </td>
4308
4309<td>IS </td>
4310
4311<td>JOIN </td>
4312 </tr>
4313
4314<tr class="b">
4315
4316<td>KEYWORD </td>
4317
4318<td>LEFT </td>
4319
4320<td>LETTING </td>
4321
4322<td>LET </td>
4323
4324<td>LIKE </td>
4325
4326<td>LIMIT </td>
4327 </tr>
4328
4329<tr class="a">
4330
4331<td>LOAD </td>
4332
4333<td>NODEGROUP </td>
4334
4335<td>NGRAM </td>
4336
4337<td>NOT </td>
4338
4339<td>OFFSET </td>
4340
4341<td>ON </td>
4342 </tr>
4343
4344<tr class="b">
4345
4346<td>OPEN </td>
4347
4348<td>OR </td>
4349
4350<td>ORDER </td>
4351
4352<td>OUTER </td>
4353
4354<td>OUTPUT </td>
4355
4356<td>PATH </td>
4357 </tr>
4358
4359<tr class="a">
4360
4361<td>POLICY </td>
4362
4363<td>PRE-SORTED </td>
4364
4365<td>PRIMARY </td>
4366
4367<td>RAW </td>
4368
4369<td>REFRESH </td>
4370
4371<td>RETURN </td>
4372 </tr>
4373
4374<tr class="b">
4375
4376<td>RTREE </td>
4377
4378<td>RUN </td>
4379
4380<td>SATISFIES </td>
4381
4382<td>SECONDARY </td>
4383
4384<td>SELECT </td>
4385
4386<td>SET </td>
4387 </tr>
4388
4389<tr class="a">
4390
4391<td>SOME </td>
4392
4393<td>TEMPORARY </td>
4394
4395<td>THEN </td>
4396
4397<td>TYPE </td>
4398
4399<td>UNKNOWN </td>
4400
4401<td>UNNEST </td>
4402 </tr>
4403
4404<tr class="b">
4405
4406<td>UPDATE </td>
4407
4408<td>USE </td>
4409
4410<td>USING </td>
4411
4412<td>VALUE </td>
4413
4414<td>WHEN </td>
4415
4416<td>WHERE </td>
4417 </tr>
4418
4419<tr class="a">
4420
4421<td>WITH </td>
4422
4423<td>WRITE </td>
4424
4425<td> </td>
4426
4427<td> </td>
4428
4429<td> </td>
4430
4431<td> </td>
4432 </tr>
4433 </tbody>
4434</table>
4435<!-- ! Licensed to the Apache Software Foundation (ASF) under one
4436 ! or more contributor license agreements. See the NOTICE file
4437 ! distributed with this work for additional information
4438 ! regarding copyright ownership. The ASF licenses this file
4439 ! to you under the Apache License, Version 2.0 (the
4440 ! "License"); you may not use this file except in compliance
4441 ! with the License. You may obtain a copy of the License at
4442 !
4443 ! http://www.apache.org/licenses/LICENSE-2.0
4444 !
4445 ! Unless required by applicable law or agreed to in writing,
4446 ! software distributed under the License is distributed on an
4447 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4448 ! KIND, either express or implied. See the License for the
4449 ! specific language governing permissions and limitations
4450 ! under the License.
4451 ! --></div></div></div></div>
4452<div class="section">
4453<h2><a name="Appendix_2._Performance_Tuning"></a><a name="Performance_tuning" id="Performance_tuning">Appendix 2. Performance Tuning</a></h2>
4454<!-- ! Licensed to the Apache Software Foundation (ASF) under one
4455 ! or more contributor license agreements. See the NOTICE file
4456 ! distributed with this work for additional information
4457 ! regarding copyright ownership. The ASF licenses this file
4458 ! to you under the Apache License, Version 2.0 (the
4459 ! "License"); you may not use this file except in compliance
4460 ! with the License. You may obtain a copy of the License at
4461 !
4462 ! http://www.apache.org/licenses/LICENSE-2.0
4463 !
4464 ! Unless required by applicable law or agreed to in writing,
4465 ! software distributed under the License is distributed on an
4466 ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
4467 ! KIND, either express or implied. See the License for the
4468 ! specific language governing permissions and limitations
4469 ! under the License.
4470 ! -->
4471<p>The SET statement can be used to override some cluster-wide configuration parameters for a specific request:</p>
4472
4473<div class="source">
4474<div class="source">
4475<pre> SET &lt;IDENTIFIER&gt; &lt;STRING_LITERAL&gt;
4476</pre></div></div>
4477<p>As parameter identifiers are qualified names (containing a &#x2018;.&#x2019;) they have to be escaped using backticks (``). Note that changing query parameters will not affect query correctness but only impact performance characteristics, such as response time and throughput.</p></div>
4478<div class="section">
4479<h2><a name="Parallelism_Parameter"></a><a name="Parallelism_parameter" id="Parallelism_parameter">Parallelism Parameter</a></h2>
4480<p>The system can execute each request using multiple cores on multiple machines (a.k.a., partitioned parallelism) in a cluster. A user can manually specify the maximum execution parallelism for a request to scale it up and down using the following parameter:</p>
4481
4482<ul>
4483
4484<li>
4485<p><b>compiler.parallelism</b>: the maximum number of CPU cores can be used to process a query. There are three cases of the value <i>p</i> for compiler.parallelism:</p>
4486
4487<ul>
4488
4489<li><i>p</i> &lt; 0 or <i>p</i> &gt; the total number of cores in a cluster: the system will use all available cores in the cluster;</li>
4490 </ul>
4491
4492<ul>
4493
4494<li><i>p</i> = 0 (the default): the system will use the storage parallelism (the number of partitions of stored datasets) as the maximum parallelism for query processing;</li>
4495 </ul>
4496
4497<ul>
4498
4499<li>all other cases: the system will use the user-specified number as the maximum number of CPU cores to use for executing the query.</li>
4500 </ul></li>
4501</ul>
4502<div class="section">
4503<div class="section">
4504<div class="section">
4505<h5><a name="Example"></a>Example</h5>
4506
4507<div class="source">
4508<div class="source">
4509<pre>SET `compiler.parallelism` &quot;16&quot;
4510
4511SELECT u.name AS uname, m.message AS message
4512FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
4513</pre></div></div></div></div></div></div>
4514<div class="section">
4515<h2><a name="Memory_Parameters"></a><a name="Memory_parameters" id="Memory_parameters">Memory Parameters</a></h2>
4516<p>In the system, each blocking runtime operator such as join, group-by and order-by works within a fixed memory budget, and can gracefully spill to disks if the memory budget is smaller than the amount of data they have to hold. A user can manually configure the memory budget of those operators within a query. The supported configurable memory parameters are:</p>
4517
4518<ul>
4519
4520<li>
4521<p><b>compiler.groupmemory</b>: the memory budget that each parallel group-by operator instance can use; 32MB is the default budget.</p></li>
4522
4523<li>
4524<p><b>compiler.sortmemory</b>: the memory budget that each parallel sort operator instance can use; 32MB is the default budget.</p></li>
4525
4526<li>
4527<p><b>compiler.joinmemory</b>: the memory budget that each parallel hash join operator instance can use; 32MB is the default budget.</p></li>
4528</ul>
4529<p>For each memory budget value, you can use a 64-bit integer value with a 1024-based binary unit suffix (for example, B, KB, MB, GB). If there is no user-provided suffix, &#x201c;B&#x201d; is the default suffix. See the following examples.</p>
4530<div class="section">
4531<div class="section">
4532<div class="section">
4533<h5><a name="Example"></a>Example</h5>
4534
4535<div class="source">
4536<div class="source">
4537<pre>SET `compiler.groupmemory` &quot;64MB&quot;
4538
4539SELECT msg.authorId, COUNT(*)
4540FROM GleambookMessages msg
4541GROUP BY msg.authorId;
4542</pre></div></div></div>
4543<div class="section">
4544<h5><a name="Example"></a>Example</h5>
4545
4546<div class="source">
4547<div class="source">
4548<pre>SET `compiler.sortmemory` &quot;67108864&quot;
4549
4550SELECT VALUE user
4551FROM GleambookUsers AS user
4552ORDER BY ARRAY_LENGTH(user.friendIds) DESC;
4553</pre></div></div></div>
4554<div class="section">
4555<h5><a name="Example"></a>Example</h5>
4556
4557<div class="source">
4558<div class="source">
4559<pre>SET `compiler.joinmemory` &quot;132000KB&quot;
4560
4561SELECT u.name AS uname, m.message AS message
4562FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
4563</pre></div></div></div></div></div></div>
4564 </div>
4565 </div>
4566 </div>
4567
4568 <hr/>
4569
4570 <footer>
4571 <div class="container-fluid">
4572 <div class="row span12">Copyright &copy; 2017
4573 <a href="https://www.apache.org/">The Apache Software Foundation</a>.
4574 All Rights Reserved.
4575
4576 </div>
4577
4578 <?xml version="1.0" encoding="UTF-8"?>
4579<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
4580 feather logo, and the Apache AsterixDB project logo are either
4581 registered trademarks or trademarks of The Apache Software
4582 Foundation in the United States and other countries.
4583 All other marks mentioned may be trademarks or registered
4584 trademarks of their respective owners.</div>
4585
4586
4587 </div>
4588 </footer>
4589 </body>
4590</html>