Update docs Change-Id:If17ec8dbd1de435c9a0caedf4daa7ee1ebe3e5b3 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/incubator-asterixdb-site/+/18219 Reviewed-by: Ian Maxon <imaxon@uci.edu>

commit: bf8620b3274a2fa6ce777a8500edb07f9804aeb6 [log] [tgz]
author: Ian Maxon <ian@maxons.email> Mon Apr 01 16:09:18 2024 -0700
committer: Ian Maxon <imaxon@uci.edu> Mon Apr 01 23:10:11 2024 +0000
tree: 3320fb6810643ba0a83f54ffca8557c102b767db
parent: 1c2dec1b731f72f4ec7e89a08597d2f55edf6b91 [diff]
diff --git a/content/docs/0.9.9/site/sqlpp/arrayindex.html b/content/docs/0.9.9/site/sqlpp/arrayindex.html
new file mode 100644
index 0000000..b6d4ae9
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/arrayindex.html

@@ -0,0 +1,285 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/arrayindex.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; AsterixDB Support of Array Indexes</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>AsterixDB Support of Array Indexes</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Overview">Overview</a></li>
+<li><a href="#QuantificationQueries">Quantification Queries</a></li>
+<li><a href="#ExplicitUnnestQueries">Explicit Unnesting Queries</a></li>
+<li><a href="#JoinQueries">Join Queries</a></li>
+<li><a href="#ComplexIndexingExamples">Complex Indexing Examples</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Overview_.5BBack_to_TOC.5D"></a><a name="Overview" id="Overview">Overview</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Array indexes are used in applications where users want to accelerate a query that involves some array-valued or multiset-valued field. This enables fast evaluation of predicates in queries involving arrays or multisets in datasets. For brevity, all further mentions of array-valued fields are also applicable to multiset-valued fields.</p>
+<p>Array-valued fields are a natural data modeling concept for documents. In the traditional inventory management example, it is natural for the line items of an order to exist as a part of the order itself. Previously if an AsterixDB user wanted to optimize a query involving a predicate on the line items of an order, they would a) have to undertake some form of schema migration to separate the line items from the orders into different datasets, b) create an index on the new dataset for line items, and finally c) modify their query to join orders and line items. With the introduction of array indexes in AsterixDB, users can keep their arrays intact and still reap the performance benefits of an index.</p>
+<p>It should be noted that in AsterixDB, array indexes are <i>not</i> meant to serve as covering indexes. In fact due to AsterixDB&#x2019;s record-level locking, index-only plans involving multi-valued fields (i.e. array indexes and inverted indexes) are not currently possible. Instead, array indexes are simply meant to accelerate queries involving multi-valued fields.</p></div>
+<div class="section">
+<h2><a name="Quantification_Queries_.5BBack_to_TOC.5D"></a><a name="QuantificationQueries" id="QuantificationQueries">Quantification Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>A common use-case for array indexes involves quantifying some or all elements within an array. Quantification queries have two variants: existential and universal. Existential queries ask if <i>any</i> element in some array satisfies a given predicate. Membership queries are a specific type of existential query, asking if any element in some array is equal to a particular value. Universal queries ask if <i>all</i> elements in some array satisfy a particular predicate. Empty arrays are not stored in an array index, meaning that a user must additionally specify that the array is non-empty to tell AsterixDB that it is possible to use an array index as an access method for the given query.</p>
+<p>All query examples here will use the orders and products datasets below.</p>
+
+<div>
+<div>
+<pre class="source">CREATE TYPE ordersType AS {
+    orderno:        int,
+    custid:			string,
+    items:          [{ itemno: int, productno: int, qty: int, price: float }]
+};
+CREATE DATASET orders (ordersType) PRIMARY KEY orderno;
+
+CREATE TYPE productsType AS {
+    productno:      int,
+    categories:     {{ string }}
+};
+CREATE DATASET products (productsType) PRIMARY KEY productno;
+</pre></div></div>
+
+<p>Let us now create an index on the <tt>categories</tt> multiset of the <tt>products</tt> dataset.</p>
+
+<div>
+<div>
+<pre class="source">CREATE INDEX pCategoriesIdx ON products (UNNEST categories) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+
+<p>Suppose we now want to find all products that have the category &#x201c;Food&#x201d;. The following membership query will utilize the index we just created.</p>
+
+<div>
+<div>
+<pre class="source">SELECT p
+FROM products p
+WHERE &quot;Food&quot; IN p.categories;
+</pre></div></div>
+
+<p>We can also rewrite the query above as an explicit existential quantification query with an equality predicate and the index will be utilized.</p>
+
+<div>
+<div>
+<pre class="source">SELECT p
+FROM products p
+WHERE SOME c IN p.categories SATISFIES c = &quot;Food&quot;;
+</pre></div></div>
+
+<p>Let us now create an index on the <tt>qty</tt> and <tt>price</tt> fields in the <tt>items</tt> array of the <tt>orders</tt> dataset.</p>
+
+<div>
+<div>
+<pre class="source">CREATE INDEX oItemsQtyPriceIdx ON orders (UNNEST items SELECT qty, price) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+
+<p>Now suppose we want to find all orders that only have items with large quantities and low prices, not counting orders without any items. The following universal quantification query will utilize the index we just created.</p>
+
+<div>
+<div>
+<pre class="source">SELECT o
+FROM orders o
+WHERE SOME AND EVERY i IN o.items SATISFIES i.qty &gt; 100 AND i.price &lt; 5.00;
+</pre></div></div>
+
+<p>Take note of the <tt>SOME AND EVERY</tt> quantifier instead of the <tt>EVERY</tt> quantifier. Array indexes cannot be used for queries with potentially empty arrays.</p></div>
+<div class="section">
+<h2><a name="Explicit_Unnesting_Queries_.5BBack_to_TOC.5D"></a><a name="ExplicitUnnestQueries" id="ExplicitUnnestQueries">Explicit Unnesting Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Array indexes can also be used to accelerate queries that involve the explicit unnesting of array fields. We can express the same membership / existential example above using an explicit <tt>UNNEST</tt> query. (To keep the same cardinality as the query above (i.e. to undo the <tt>UNNEST</tt>), we add a <tt>DISTINCT</tt> clause, though the index would be utilized either way.)</p>
+
+<div>
+<div>
+<pre class="source">SELECT DISTINCT p
+FROM products p, p.categories c
+WHERE c = &quot;Food&quot;;
+</pre></div></div>
+
+<p>As another example, suppose that we want to find all orders that have <i>some</i> item with a large quantity. The following query will utilize the <tt>oItemsQtyPriceIdx</tt> we created, using only the first field in the index <tt>qty</tt>.</p>
+
+<div>
+<div>
+<pre class="source">SELECT DISTINCT o
+FROM orders o, o.items i
+WHERE i.qty &gt; 100;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Join_Queries_.5BBack_to_TOC.5D"></a><a name="JoinQueries" id="JoinQueries">Join Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Finally, array indexes can also be used for index nested-loop joins if the field being joined is located within an array. Let us create another index on the <tt>items</tt> array of the <tt>orders</tt> dataset, this time on the <tt>productno</tt> field.</p>
+
+<div>
+<div>
+<pre class="source">CREATE INDEX oProductIDIdx ON orders (UNNEST items SELECT productno) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+
+<p>Now suppose we want to find all products located in a specific order. We can accomplish this with the join query below. Note that we must specify the <tt>indexnl</tt> join hint to tell AsterixDB that we want to optimize this specific join, as hash joins are the default join method otherwise.</p>
+
+<div>
+<div>
+<pre class="source">SELECT DISTINCT p
+FROM products p, orders o
+WHERE o.custid = &quot;C41&quot; AND 
+      SOME i IN o.items SATISFIES i.productno /*+ indexnl */ = p.productno;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Complex_Indexing_Examples_.5BBack_to_TOC.5D"></a><a name="ComplexIndexingExamples" id="ComplexIndexingExamples">Complex Indexing Examples</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<div class="section">
+<h3><a name="Open_Indexes"></a>Open Indexes</h3>
+<p>Similar to atomic indexes, array indexes are not limited to closed fields. The following DDLs illustrate how we could express <tt>CREATE INDEX</tt> statements comparable to those above if the to-be-indexed fields were not included in the their dataset&#x2019;s type definitions.</p>
+
+<div>
+<div>
+<pre class="source">CREATE INDEX pCategoriesIdx ON products (UNNEST categories : string) EXCLUDE UNKNOWN KEY;
+CREATE INDEX oItemsQtyPriceIdx ON orders (UNNEST items SELECT qty : int, price : int) EXCLUDE UNKNOWN KEY;
+CREATE INDEX oProductIDIdx ON orders (UNNEST items SELECT productno : int) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Composite_Atomic-Array_Indexes"></a>Composite Atomic-Array Indexes</h3>
+<p>Indexed elements within array indexes are also not limited to fields within arrays. The following DDLs demonstrate indexing fields that are within an array and fields that are outside any array.</p>
+
+<div>
+<div>
+<pre class="source">CREATE INDEX oOrderNoItemPriceIdx ON orders (orderno, ( UNNEST items SELECT price )) EXCLUDE UNKNOWN KEY;
+CREATE INDEX oOrderItemPriceNoIdx ON orders (( UNNEST items SELECT price ), orderno) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Arrays_in_Arrays"></a>Arrays in Arrays</h3>
+<p>Array indexes are not just limited to arrays of depth = 1. We can generalize this to arrays of arbitrary depth, as long as an object encapsulates each array. The following DDLs describe indexing the <tt>qty</tt> field in an <tt>items</tt> array at various depths.</p>
+
+<div>
+<div>
+<pre class="source">// { orderno: ..., items0: [ { items1: [ { qty: int, ... } ] } ] }
+CREATE INDEX oItemItemQtyIdx ON orders (UNNEST items0 UNNEST items1 SELECT qty) EXCLUDE UNKNOWN KEY;
+
+// { orderno: ..., items0: [ { items1: [ { items2: [ { qty: int, ... } ] } ] } ] }
+CREATE INDEX oItemItemItemQtyIdx ON orders (UNNEST items0 UNNEST items1 UNNEST items2 SELECT qty) EXCLUDE UNKNOWN KEY;
+</pre></div></div>
+
+<p>The queries below will utilize the indexes above. The first query utilizes the <tt>oItemItemQtyIdx</tt> index through nested existential quantification. The second query utilizes the <tt>oItemItemItemQtyIdx</tt> index with three unnesting clauses.</p>
+
+<div>
+<div>
+<pre class="source">SELECT o
+FROM orders o
+WHERE SOME o0 IN o.items0 SATISFIES (
+    SOME o1 IN o0.items1 SATISFIES o1.qty = 100
+);
+
+SELECT DISTINCT o
+FROM orders o, o.items0 o0, o0.items1 o1, o1.items2 o2
+WHERE o2.qty = 100;
+</pre></div></div></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

diff --git a/content/docs/0.9.9/site/sqlpp/filters.html b/content/docs/0.9.9/site/sqlpp/filters.html
new file mode 100644
index 0000000..9df1af7
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/filters.html

@@ -0,0 +1,148 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/filters.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; Filter-Based LSM Index Acceleration</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>Filter-Based LSM Index Acceleration</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Motivation">Motivation</a></li>
+<li><a href="#FiltersInAsterixDB">Filters in AsterixDB</a></li>
+<li><a href="#FiltersAndMergePolicies">Filters and Merge Policies</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Motivation_.5BBack_to_TOC.5D"></a><a name="Motivation" id="Motivation">Motivation</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Traditional relational databases usually employ conventional index structures such as B+ trees due to their low read latency.  However, such traditional index structures use in-place writes to perform updates, resulting in costly random writes to disk. Today&#x2019;s emerging applications often involve insert-intensive workloads for which the cost of random writes prohibits efficient ingestion of data. Consequently, popular NoSQL systems such as Cassandra, HBase, LevelDB, BigTable, etc. have adopted Log-Structured Merge (LSM) Trees as their storage structure. LSM-trees avoids the cost of random writes by batching updates into a component of the index that resides in main memory &#x2013; an <i>in-memory component</i>. When the space occupancy of the in-memory component exceeds a specified threshold, its entries are <i>flushed</i> to disk forming a new component &#x2013; a <i>disk component</i>. As disk components accumulate on disk, they are periodically merged together subject to a <i>merge policy</i> that decides when and what to merge. The benefit of the LSM-trees comes at the cost of possibly sacrificing read efficiency, but, it has been shown in previous studies that these inefficiencies can be mostly mitigated.</p>
+<p>AsterixDB has also embraced LSM-trees, not just by using them as primary indexes, but also by using the same LSM-ification technique for all of its secondary index structures. In particular, AsterixDB adopted a generic framework for converting a class of indexes (that includes conventional B+ trees, R trees, and inverted indexes) into LSM-based secondary indexes, allowing higher data ingestion rates. In fact, for certain index structures, our results have shown that using an LSM-based version of an index can be made to significantly outperform its conventional counterpart for <i>both</i> ingestion and query speed (an example of such an index being the R-tree for spatial data).</p>
+<p>Since an LSM-based index naturally partitions data into multiple disk components, it is possible, when answering certain queries, to exploit partitioning to only access some components and safely filter out the remaining components, thus reducing query times. For instance, referring to our <a href="primer.html#ADM:_Modeling_Semistructured_Data_in_AsterixDB">TinySocial</a> example, suppose a user always retrieves tweets from the <tt>TweetMessages</tt> dataset based on the <tt>send-time</tt> field (e.g., tweets posted in the last 24 hours). Since there is not a secondary index on the <tt>send-time</tt> field, the only available option for AsterixDB would be to scan the whole <tt>TweetMessages</tt> dataset and then apply the predicate as a post-processing step. However, if disk components of the primary index were tagged with the minimum and maximum timestamp values of the objects they contain, we could utilize the tagged information to directly access the primary index and prune components that do not match the query predicate. Thus, we could save substantial cost by avoiding scanning the whole dataset and only access the relevant components. We simply call such tagging information that are associated with components, filters. (Note that even if there were a secondary index on <tt>send-time</tt> field, using filters could save substantial cost by avoiding accessing the secondary index, followed by probing the primary index for every fetched entry.) Moreover, the same filtering technique can also be used with any secondary LSM index (e.g., an LSM R-tree), in case the query contains multiple predicates (e.g., spatial and temporal predicates), to obtain similar pruning power.</p></div>
+<div class="section">
+<h2><a name="Filters_in_AsterixDB_.5BBack_to_TOC.5D"></a><a name="FiltersInAsterixDB" id="FiltersInAsterixDB">Filters in AsterixDB</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>We have added support for LSM-based filters to all of AsterixDB&#x2019;s index types. To enable the use of filters, the user must specify the filter&#x2019;s key when creating a dataset, as shown below:</p>
+<div class="section">
+<div class="section">
+<h4><a name="Creating_a_Dataset_with_a_Filter"></a>Creating a Dataset with a Filter</h4>
+
+<div>
+<div>
+<pre class="source">    create dataset Tweets(TweetType) primary key tweetid with filter on send-time;
+</pre></div></div>
+
+<p>Filters can be created on any totally ordered datatype (i.e., any field that can be indexed using a B+ -tree), such as integers, doubles, floats, UUIDs, datetimes, etc.</p>
+<p>When a dataset with a filter is created, the name of the filter&#x2019;s key field is persisted in the <tt>Metadata.Dataset</tt> dataset (which is the metadata dataset that stores the details of each dataset in an AsterixDB instance) so that DML operations against the dataset can recognize the existence of filters and can update them or utilize them accordingly. Creating a dataset with a filter in AsterixDB implies that the primary and all secondary indexes of that dataset will maintain filters on their disk components. Once a filtered dataset is created, the user can use the dataset normally (just like any other dataset). AsterixDB will automatically maintain the filters and will leverage them to efficiently answer queries whenever possible (i.e., when a query has predicates on the filter&#x2019;s key).</p></div></div></div>
+<div class="section">
+<h2><a name="Filters_and_Merge_Policies_.5BBack_to_TOC.5D"></a><a name="FiltersAndMergePolicies" id="FiltersAndMergePolicies">Filters and Merge Policies</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>The AsterixDB default merge policy, the prefix merge policy, relies on component sizes and the number of components to decide which components to merge. This merge policy has proven to provide excellent performance for both ingestion and queries. However, when evaluating our filtering solution with the prefix policy, we observed a behavior that can reduce filter effectiveness. In particular, we noticed that under the prefix merge policy, the disk components of a secondary index tend to be constantly merged into a single component. This is because the prefix policy relies on a single size parameter for all of the indexes of a dataset. This parameter is typically chosen based on the sizes of the disk components of the primary index, which tend to be much larger than the sizes of the secondary indexes&#x2019; disk components. This difference caused the prefix merge policy to behave similarly to the constant merge policy (i.e., relatively poorly) when applied to secondary indexes in the sense that the secondary indexes are constantly merged into a single disk component. Consequently, the effectiveness of filters on secondary indexes was greatly reduced under the prefix-merge policy, but they were still effective when probing the primary index.  Based on this behavior, we developed a new merge policy, an improved version of the prefix policy, called the correlated-prefix policy. The basic idea of this policy is that it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index. When the policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests to the I/O scheduler on behalf of all other indexes associated with the same dataset. The end result is that secondary indexes will always have the same number of disk components as their primary index under the correlated-prefix merge policy. This has improved query performance, since disk components of secondary indexes now have a much better chance of being pruned.</p></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

diff --git a/content/docs/0.9.9/site/sqlpp/fulltext.html b/content/docs/0.9.9/site/sqlpp/fulltext.html
new file mode 100644
index 0000000..f1325f3
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/fulltext.html

@@ -0,0 +1,202 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/fulltext.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; AsterixDB  Support of Full-text search queries</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>AsterixDB  Support of Full-text search queries</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Motivation">Motivation</a></li>
+<li><a href="#Syntax">Syntax</a></li>
+<li><a href="#FulltextIndex">Creating and utilizing a Full-text index</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Motivation_.5BBack_to_TOC.5D"></a><a name="Motivation" id="Motivation">Motivation</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Full-Text Search (FTS) queries are widely used in applications where users need to find records that satisfy an FTS predicate, i.e., where simple string-based matching is not sufficient. These queries are important when finding documents that contain a certain keyword is crucial. FTS queries are different from substring matching queries in that FTS queries find their query predicates as exact keywords in the given string, rather than treating a query predicate as a sequence of characters. For example, an FTS query that finds &#x201c;rain&#x201d; correctly returns a document when it contains &#x201c;rain&#x201d; as a word. However, a substring-matching query returns a document whenever it contains &#x201c;rain&#x201d; as a substring, for instance, a document with &#x201c;brain&#x201d; or &#x201c;training&#x201d; would be returned as well.</p></div>
+<div class="section">
+<h2><a name="Syntax_.5BBack_to_TOC.5D"></a><a name="Syntax" id="Syntax">Syntax</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>The syntax of AsterixDB FTS follows a portion of the XQuery FullText Search syntax. Two basic forms are as follows:</p>
+
+<div>
+<div>
+<pre class="source">    ftcontains(Expression1, Expression2, {FullTextOption})
+    ftcontains(Expression1, Expression2)
+</pre></div></div>
+
+<p>For example, we can execute the following query to find Chirp messages where the <tt>messageText</tt> field includes &#x201c;voice&#x201d; as a word. Please note that an FTS search is case-insensitive. Thus, &#x201c;Voice&#x201d; or &#x201c;voice&#x201d; will be evaluated as the same word.</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select element {&quot;chirpId&quot;: msg.chirpId}
+    from ChirpMessages msg
+    where ftcontains(msg.messageText, &quot;voice&quot;, {&quot;mode&quot;:&quot;any&quot;});
+</pre></div></div>
+
+<p>The DDL and DML of TinySocial can be found in <a href="../sqlpp/primer-sqlpp.html#ADM:_Modeling_Semistructured_Data_in_AsterixDB">ADM: Modeling Semistructured Data in AsterixDB</a>.</p>
+<p>The <tt>Expression1</tt> is an expression that should be evaluable as a string at runtime as in the above example where <tt>msg.messageText</tt> is a string field. The <tt>Expression2</tt> can be a string, an (un)ordered list of string value(s), or an expression. In the last case, the given expression should be evaluable into one of the first two types, i.e., into a string value or an (un)ordered list of string value(s).</p>
+<p>The following examples are all valid expressions.</p>
+
+<div>
+<div>
+<pre class="source">   ... where ftcontains(msg.messageText, &quot;sound&quot;)
+   ... where ftcontains(msg.messageText, &quot;sound&quot;, {&quot;mode&quot;:&quot;any&quot;})
+   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;], {&quot;mode&quot;:&quot;any&quot;})
+   ... where ftcontains(msg.messageText, {{&quot;speed&quot;, &quot;stand&quot;, &quot;customization&quot;}}, {&quot;mode&quot;:&quot;all&quot;})
+</pre></div></div>
+
+<p>The last <tt>FullTextOption</tt> parameter clarifies the given FTS request. If you omit the <tt>FullTextOption</tt> parameter, then the default value will be set for each possible option. Currently, we only have one option named <tt>mode</tt>. And as we extend the FTS feature, more options will be added. Please note that the format of <tt>FullTextOption</tt> is a record, thus you need to put the option(s) in a record <tt>{}</tt>. The <tt>mode</tt> option indicates whether the given FTS query is a conjunctive (AND) or disjunctive (OR) search request. This option can be either <tt>&#x201c;all&#x201d;</tt> (AND) or <tt>&#x201c;any&#x201d;</tt> (OR). The default value for <tt>mode</tt> is <tt>&#x201c;all&#x201d;</tt>. If one specifies <tt>&#x201c;any&#x201d;</tt>, a disjunctive search will be conducted. For example, the following query will find documents whose <tt>messageText</tt> field contains &#x201c;sound&#x201d; or &#x201c;system&#x201d;, so a document will be returned if it contains either &#x201c;sound&#x201d;, &#x201c;system&#x201d;, or both of the keywords.</p>
+
+<div>
+<div>
+<pre class="source">   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;], {&quot;mode&quot;:&quot;any&quot;})
+</pre></div></div>
+
+<p>The other option parameter,<tt>&#x201c;all&#x201d;</tt>, specifies a conjunctive search. The following examples will find the documents whose <tt>messageText</tt> field contains both &#x201c;sound&#x201d; and &#x201c;system&#x201d;. If a document contains only &#x201c;sound&#x201d; or &#x201c;system&#x201d; but not both, it will not be returned.</p>
+
+<div>
+<div>
+<pre class="source">   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;], {&quot;mode&quot;:&quot;all&quot;})
+   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;])
+</pre></div></div>
+
+<p>Currently AsterixDB doesn&#x2019;t (yet) support phrase searches, so the following query will not work.</p>
+
+<div>
+<div>
+<pre class="source">   ... where ftcontains(msg.messageText, &quot;sound system&quot;, {&quot;mode&quot;:&quot;any&quot;})
+</pre></div></div>
+
+<p>As a workaround solution, the following query can be used to achieve a roughly similar goal. The difference is that the following queries will find documents where <tt>msg.messageText</tt> contains both &#x201c;sound&#x201d; and &#x201c;system&#x201d;, but the order and adjacency of &#x201c;sound&#x201d; and &#x201c;system&#x201d; are not checked, unlike in a phrase search. As a result, the query below would also return documents with &#x201c;sound system can be installed.&#x201d;, &#x201c;system sound is perfect.&#x201d;, or &#x201c;sound is not clear. You may need to install a new system.&#x201d;</p>
+
+<div>
+<div>
+<pre class="source">   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;], {&quot;mode&quot;:&quot;all&quot;})
+   ... where ftcontains(msg.messageText, [&quot;sound&quot;, &quot;system&quot;])
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Creating_and_utilizing_a_Full-text_index_.5BBack_to_TOC.5D"></a><a name="FulltextIndex" id="FulltextIndex">Creating and utilizing a Full-text index</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>When there is a full-text index on the field that is being searched, rather than scanning all records, AsterixDB can utilize that index to expedite the execution of a FTS query. To create a full-text index, you need to specify the index type as <tt>fulltext</tt> in your DDL statement. For instance, the following DDL statement create a full-text index on the <tt>GleambookMessages.message</tt> attribute. Note that a full-text index cannot be built on a dataset with the variable-length primary key (e.g., string).</p>
+
+<div>
+<div>
+<pre class="source">use TinySocial;
+
+create index messageFTSIdx on GleambookMessages(message) type fulltext;
+</pre></div></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

diff --git a/content/docs/0.9.9/site/sqlpp/parquet.html b/content/docs/0.9.9/site/sqlpp/parquet.html
new file mode 100644
index 0000000..537b195
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/parquet.html

@@ -0,0 +1,595 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/parquet.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; Querying Parquet Files</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>Querying Parquet Files</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Overview">Overview</a></li>
+<li><a href="#DDL">DDL</a></li>
+<li><a href="#QueryParquetFiles">Query Parquet Files</a></li>
+<li><a href="#TypeCompatibility">Type Compatibility</a></li>
+<li><a href="#ParquetTypeFlags">Parquet Type Flags</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Overview_.5BBack_to_TOC.5D"></a><a name="Overview" id="Overview">Overview</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p><a class="externalLink" href="https://parquet.apache.org/">Apache Parquet</a> is a columnar file format for storing semi-structured data (like JSON). Apache AsterixDB supports running queries against Parquet files that are stored in Amazon S3 and Microsoft Azure Blob Storage as <a href="../aql/externaldata.html">External Datasets</a>.</p></div>
+<div class="section">
+<h2><a name="DDL_.5BBack_to_TOC.5D"></a><a name="DDL" id="DDL">DDL</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>To start, an end-user needs to create a type as follows:</p>
+
+<div>
+<div>
+<pre class="source">-- The type should not contain any declared fields
+CREATE TYPE ParquetType AS {
+}
+</pre></div></div>
+
+<p>Note that the created type does not have any declared fields. The reason is that Parquet files embed the schema within each file. Thus, no type is needed to be declared, and it is up to AsterixDB to read each file&#x2019;s schema. If the created type contains any declared type, AsterixDB will throw an error:</p>
+
+<div>
+<div>
+<pre class="source">Type 'ParquetType' contains declared fields, which is not supported for 'parquet' format
+</pre></div></div>
+
+<p>Next, the user can create an external dataset - using the declared type - as follows:</p>
+<div class="section">
+<h3><a name="Amazon_S3"></a>Amazon S3</h3>
+
+<div>
+<div>
+<pre class="source">CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3
+(
+    -- Replace &lt;ACCESS-KEY&gt; with your access key
+    (&quot;accessKeyId&quot;=&quot;&lt;ACCESS-KEY&gt;&quot;),
+
+    -- Replace &lt;SECRET-ACCESS-KEY&gt; with your access key
+    (&quot;secretAccessKey&quot; = &quot;&lt;SECRET-ACCESS-KEY&gt;&quot;),
+
+    -- S3 bucket
+    (&quot;container&quot;=&quot;parquetBucket&quot;),
+
+    -- Path to the parquet files within the bucket
+    (&quot;definition&quot;=&quot;path/to/parquet/files&quot;),
+
+    -- Specifying the format as parquet
+    (&quot;format&quot; = &quot;parquet&quot;)
+);
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Microsoft_Azure_Blob_Storage"></a>Microsoft Azure Blob Storage</h3>
+
+<div>
+<div>
+<pre class="source">CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING AZUREBLOB
+(
+    -- Replace &lt;ACCOUNT-NAME&gt; with your account name
+    (&quot;accountName&quot;=&quot;&lt;ACCOUNT-NAME&gt;&quot;),
+
+    -- Replace &lt;ACCOUNT-KEY&gt; with your account key
+    (&quot;accountKey&quot;=&quot;&lt;ACCOUNT-KEY&gt;&quot;),
+
+    -- Azure Blob container
+    (&quot;container&quot;=&quot;parquetContainer&quot;),
+
+    -- Path to the parquet files within the bucket
+    (&quot;definition&quot;=&quot;path/to/parquet/files&quot;),
+
+    -- Specifying the format as parquet
+    (&quot;format&quot; = &quot;parquet&quot;)
+);
+</pre></div></div>
+
+<p><i><b>Additional setting/properties could be set as detailed later in <a href="#ParquetTypeFlags">Parquet Type Flags</a></b></i></p></div></div>
+<div class="section">
+<h2><a name="Query_Parquet_Files_.5BBack_to_TOC.5D"></a><a name="QueryParquetFiles" id="QueryParquetFiles">Query Parquet Files</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>To query the data stored in Parquet files, one can simply write a query against the created External Dataset. For example:</p>
+
+<div>
+<div>
+<pre class="source">SELECT COUNT(*)
+FROM ParquetDataset;
+</pre></div></div>
+
+<p>Another example:</p>
+
+<div>
+<div>
+<pre class="source">SELECT pd.age, COUNT(*) cnt
+FROM ParquetDataset pd
+GROUP BY pd.age;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Type_Compatibility_.5BBack_to_TOC.5D"></a><a name="TypeCompatibility" id="TypeCompatibility">Type Compatibility</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB supports Parquet&#x2019;s generic types such <tt>STRING</tt>, <tt>INT</tt> and <tt>DOUBLE</tt>. However, Parquet files could contain <a class="externalLink" href="https://github.com/apache/parquet-format/blob/master/LogicalTypes.md">additional types</a> such as <tt>DATE</tt> and <tt>DATETIME</tt> like types. The following table show the type mapping between Apache Parquet and AsterixDB:</p>
+
+<table border="0" class="table table-striped">
+    <thead>
+        
+<tr class="a">
+            
+<th>Parquet</th>
+            
+<th>AsterixDB</th>
+            
+<th>Value Examples</th>
+            
+<th>Comment</th>
+        </tr>
+    </thead>
+    <tbody>
+        
+<tr class="b">
+            
+<td><tt>BOOLEAN</tt></td>
+            
+<td><tt>BOOLEAN</tt></td>
+            
+<td><tt>true</tt> / <tt>false</tt></td>
+            
+<td>-</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>INT_8</tt></td>
+            
+<td rowspan="8"><tt>BIGINT</tt></td>
+            
+<td rowspan="8">
+                AsterixDB <tt>BIGINT</tt> Range:
+                
+<ul>
+                    
+<li><b>Min</b>:-9,223,372,036,854,775,808</li>
+                    
+<li><b>Max</b>: 9,223,372,036,854,775,807</li>
+                </ul>
+            </td>
+            
+<td rowspan="7">-</td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>INT_16</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>INT_32</tt></td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>INT_64</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>UNIT_8</tt></td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>UINT_16</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>UINT_32</tt></td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>UINT_64</tt></td>
+            
+<td>There is a possibility that a value overflows. A warning will be issued in case of an overflow and
+                <tt>MISSING</tt> would be returned.
+            </td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>FLOAT</tt></td>
+            
+<td rowspan="4"><tt>DOUBLE</tt></td>
+            
+<td rowspan="4">
+                AsterixDB <tt>DOUBLE</tt> Range:
+                
+<ul>
+                    
+<li><b>Min Positive Value</b>: 2^-1074</li>
+                    
+<li><b>Max Positive Value</b>: 2^1023</li>
+                </ul>
+            </td>
+            
+<td rowspan="2">-</td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>DOUBLE</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>FIXED_LEN_BYTE_ARRAY (DECIMAL)</tt></td>
+            
+<td rowspan="2">
+                Parquet <tt>DECIMAL</tt> values are converted to doubles, with the possibility of precision loss.
+                The flag <tt>decimal-to-double</tt> must be set upon creating the dataset.
+                
+<ul>
+<li><i>See <a href="#ParquetTypeFlags">Parquet Type Flags</a></i></li></ul>
+            </td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>BINARY (DECIMAL)</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>BINARY (ENUM)</tt></td>
+            
+<td><tt>&quot;Fruit&quot;</tt></td>
+            
+<td>Parquet Enum values are parsed as Strings</td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>BINARY (UTF8)</tt></td>
+            
+<td><tt>STRING</tt></td>
+            
+<td><tt>&quot;Hello World&quot;</tt></td>
+            
+<td>-</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>FIXED_LEN_BYTE_ARRAY (UUID)</tt></td>
+            
+<td><tt>UUID</tt></td>
+            
+<td><tt>uuid(&quot;123e4567-e89b-12d3-a456-426614174000&quot;)</tt></td>
+            
+<td>-</td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>INT_32 (DATE)</tt></td>
+            
+<td><tt>DATE</tt></td>
+            
+<td><tt>date(&quot;2021-11-01&quot;)</tt></td>
+            
+<td>-</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>INT_32 (TIME)</tt></td>
+            
+<td><tt>TIME</tt></td>
+            
+<td rowspan="2"><tt>time(&quot;00:00:00.000&quot;)</tt></td>
+            
+<td>Time in milliseconds.</td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>INT_64 (TIME)</tt></td>
+            
+<td><tt>TIME</tt></td>
+            
+<td>Time in micro/nano seconds.</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>INT_64 (TIMESTAMP)</tt></td>
+            
+<td rowspan="2"><tt>DATETIME</tt></td>
+            
+<td rowspan="2"><tt>datetime(&quot;2021-11-01T21:37:13.738&quot;)&quot;</tt></td>
+            
+<td>Timestamp in milli/micro/nano seconds. Parquet also can store the timestamp values with the option
+                <tt>isAdjustedToUTC = true</tt>. To get the local timestamp value, the user can set the time zone ID 
+                by setting the value using the option <tt>timezone</tt> to get the local <tt>DATETIME</tt> value.
+                
+<ul>
+<li><i>See <a href="#ParquetTypeFlags">Parquet Type Flags</a></i></li></ul>
+            </td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>INT96</tt></td>
+            
+<td>A timestamp values that separate days and time to form a timestamp. INT96 is always in localtime.</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>BINARY (JSON)</tt></td>
+            
+<td>any type</td>
+            
+<td>
+                
+<ul>
+                    
+<li><tt>{&quot;name&quot;: &quot;John&quot;}</tt></li>
+                    
+<li><tt>[1, 2, 3]</tt></li>
+                </ul> 
+            </td>
+            
+<td>
+                Parse JSON string into internal AsterixDB value.
+                The flag <tt>parse-json-string</tt> is set by default. To get the string value (i.e., not parsed as
+                AsterixDB value), unset the flag <tt>parse-json-string</tt>.
+                
+<ul>
+<li><i>See <a href="#ParquetTypeFlags">Parquet Type Flags</a></i></li></ul>
+            </td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>BINARY</tt></td>
+            
+<td rowspan="2"><tt>BINARY</tt></td>
+            
+<td><tt>hex(&quot;0101FF&quot;)</tt></td>
+            
+<td>-</td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>BSON</tt></td>
+            
+<td>N/A</td>
+            
+<td>BSON values will be returned as <tt>BINARY</tt></td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>LIST</tt></td>
+            
+<td><tt>ARRAY</tt></td>
+            
+<td><tt>[1, 2, 3]</tt></td>
+            
+<td>Parquet's <tt>LIST</tt> type is converted into <tt>ARRAY</tt></td>
+        </tr>
+        
+<tr class="a">
+            
+<td><tt>MAP</tt></td>
+            
+<td><tt>ARRAY</tt> of <tt>OBJECT</tt></td>
+            
+<td><tt>[{&quot;key&quot;:1, &quot;value&quot;:1}, {&quot;key&quot;:2, &quot;value&quot;:2}]</tt></td>
+            
+<td>Parquet's <tt>MAP</tt> types are converted into an <tt>ARRAY</tt> of <tt>OBJECT</tt>. Each 
+                <tt>OBJECT</tt> value consists of two fields: <tt>key</tt> and <tt>value</tt>
+            </td>
+        </tr>
+        
+<tr class="b">
+            
+<td><tt>FIXED_LEN_BYTE_ARRAY (INTERVAL)</tt></td>
+            
+<td>-</td>
+            
+<td>N/A</td>
+            
+<td><tt>INTERVAL</tt> is not supported. A warning will be issued and <tt>MISSING</tt> value
+                will be returned.
+            </td>
+        </tr>
+    </tbody>
+</table>
+</div>
+<div class="section">
+<h2><a name="Parquet_Type_Flags_.5BBack_to_TOC.5D"></a><a name="ParquetTypeFlags" id="ParquetTypeFlags">Parquet Type Flags</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>The table in <a href="#TypeCompatibility">Type Compatibility</a> shows the type mapping between Parquet and AsterixDB. Some of the Parquet types are not parsed by default as those type are not natively supported in AsterixDB. However, the user can set a flag to convert some of those types into a supported AsterixDB type.</p>
+<div class="section">
+<div class="section">
+<div class="section">
+<h5><a name="DECIMAL_TYPE"></a>DECIMAL TYPE</h5>
+<p>The user can enable parsing <tt>DECIMAL</tt> Parquet values by enabling a certain flag as in the following example:</p>
+
+<div>
+<div>
+<pre class="source">CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3
+(
+    -- Credintials and path to Parquet files
+    ...
+
+    -- Enable converting decimal values to double
+    (&quot;decimal-to-double&quot; = &quot;true&quot;)
+);
+</pre></div></div>
+
+<p>This flag will enable parsing/converting <tt>DECIMAL</tt> values/types into <tt>DOUBLE</tt>. For example, if the flag <tt>decimal-to-double</tt> is not set and a Parquet file contains a <tt>DECIMAL</tt> value, the following error will be thrown when running a query that request a <tt>DECIMAL</tt> value:</p>
+
+<div>
+<div>
+<pre class="source">Parquet type &quot;optional fixed_len_byte_array(16) decimalType (DECIMAL(38,18))&quot; is not supported by default. To enable type conversion, recreate the external dataset with the option &quot;decimal-to-double&quot; enabled
+</pre></div></div>
+
+<p>and the returned value will be <tt>MISSING</tt>. If the flag <tt>decimal-to-double</tt> is set, the converted <tt>DOUBLE</tt> value will be returned.</p></div>
+<div class="section">
+<h5><a name="TEMPORAL_TYPES"></a>TEMPORAL TYPES</h5>
+<p>For the temporal types (namely <tt>DATETIME</tt>), their values could be stored in Parquet with the option <tt>isAdjustedToUTC = true</tt>. Hence, the user has to provide the timezone ID to adjust their values to the local value by setting the flag <tt>timezone</tt>. To do so, a user can set the timezone ID to &#x201c;<b>PST</b>&#x201d; upon creating a dataset as in the following example:</p>
+
+<div>
+<div>
+<pre class="source">CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3
+(
+    -- Credintials and path to Parquet files
+    ...
+
+    -- Converting UTC time to PST time
+    (&quot;timezone&quot; = &quot;PST&quot;)
+);
+</pre></div></div>
+
+<p>If the flag <tt>timezone</tt> is not set, a warning will appear when running a query:</p>
+
+<div>
+<div>
+<pre class="source">Parquet file(s) contain &quot;datetime&quot; values that are adjusted to UTC. Recreate the external dataset and set &quot;timezone&quot; to get the local &quot;datetime&quot; value.
+</pre></div></div>
+
+<p>and the UTC <tt>DATETIME</tt> will be returned.</p></div>
+<div class="section">
+<h5><a name="JSON_TYPE"></a>JSON TYPE</h5>
+<p>By default, we parse the JSON values into AsterixDB values, where a user can process those values using <tt>SQL++</tt> queries. However, one could disable the parsing of JSON string values (which stored as <tt>STRING</tt>) by unsetting the flag <tt>parseJsonString</tt> as in the following example:</p>
+
+<div>
+<div>
+<pre class="source">CREATE EXTERNAL DATASET ParquetDataset(ParquetType) USING S3
+(
+    -- Credintials and path to Parquet files
+    ...
+
+    -- Stop parsing JSON string values
+    (&quot;parse-json-string&quot; = &quot;false&quot;)
+);
+</pre></div></div>
+
+<p>And the returned value will be of type <tt>STRING</tt>.</p></div>
+<div class="section">
+<h5><a name="INTERVAL_TYPE"></a>INTERVAL TYPE</h5>
+<p>Currently, AsterixDB do not support Parquet&#x2019;s <tt>INTERVAL</tt> type. When a query requests (or projects) an <tt>INTERVAL</tt> value, a warning will be issued and <tt>MISSING</tt> value will be returned instead.</p></div></div></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

diff --git a/content/docs/0.9.9/site/sqlpp/primer-sqlpp.html b/content/docs/0.9.9/site/sqlpp/primer-sqlpp.html
new file mode 100644
index 0000000..51bb30d
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/primer-sqlpp.html

@@ -0,0 +1,858 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/primer-sqlpp.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; AsterixDB 101: An ADM and SQL++ Primer</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li class="active"><a href="#"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li><a href="../sqlpp/similarity.html" title="Support of Similarity Queries"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>AsterixDB 101: An ADM and SQL++ Primer</h1>
+<div class="section">
+<h2><a name="Welcome_to_AsterixDB.21"></a>Welcome to AsterixDB!</h2>
+<p>This document introduces the main features of AsterixDB&#x2019;s data model (ADM) and its new SQL-like query language (SQL++) by example. The example is a simple scenario involving (synthetic) sample data modeled after data from the social domain. This document describes a set of sample datasets, together with a set of illustrative queries, to introduce you to the &#x201c;AsterixDB user experience&#x201d;. The complete set of steps required to create and load a handful of sample datasets, along with runnable queries and the expected results for each query, are included.</p>
+<p>This document assumes that you are at least vaguely familiar with AsterixDB and why you might want to use it. Most importantly, it assumes you already have a running instance of AsterixDB and that you know how to query it using AsterixDB&#x2019;s basic web interface. For more information on these topics, you should go through the steps in <a href="../install.html">Installing Asterix Using Managix</a> before reading this document and make sure that you have a running AsterixDB instance ready to go. To get your feet wet, you should probably start with a simple local installation of AsterixDB on your favorite machine, accepting all of the default settings that Managix offers. Later you can graduate to trying AsterixDB on a cluster, its real intended home (since it targets Big Data). (Note: With the exception of specifying the correct locations where you put the source data for this example, there should no changes needed in the SQL++ statements to run the examples locally and/or to run them on a cluster when you are ready to take that step.)</p>
+<p>As you read through this document, you should try each step for yourself on your own AsterixDB instance. You will use the AsterixDB web interface to do this, and for SQL++ you will need to select SQL++ instead of AQL as your language of choice in the Query Language box that sits underneath the UI&#x2019;s query entry area. Once you have reached the end of this tutorial, you will be fully armed and dangerous, with all the basic AsterixDB knowledge that you&#x2019;ll need to start down the path of modeling, storing, and querying your own semistructured data.</p></div>
+<div class="section">
+<h2><a name="ADM:_Modeling_Semistructured_Data_in_AsterixDB"></a>ADM: Modeling Semistructured Data in AsterixDB</h2>
+<p>In this section you will learn all about modeling Big Data using ADM, the data model of the AsterixDB BDMS.</p>
+<div class="section">
+<h3><a name="Dataverses.2C_Datatypes.2C_and_Datasets"></a>Dataverses, Datatypes, and Datasets</h3>
+<p>The top-level organizing concept in the AsterixDB world is the <i>dataverse</i>. A dataverse&#x2014;short for &#x201c;data universe&#x201d;&#x2014;is a place (similar to a database in a relational DBMS) in which to create and manage the types, datasets, functions, and other artifacts for a given AsterixDB application. When you start using an AsterixDB instance for the first time, it starts out &#x201c;empty&#x201d;; it contains no data other than the AsterixDB system catalogs (which live in a special dataverse called the Metadata dataverse). To store your data in AsterixDB, you will first create a dataverse and then you use it for the <i>datatypes</i> and <i>datasets</i> for managing your own data. A datatype tells AsterixDB what you know (or more accurately, what you want it to know) a priori about one of the kinds of data instances that you want AsterixDB to hold for you. A dataset is a collection of data instances of a datatype, and AsterixDB makes sure that the data instances that you put in it conform to its specified type. Since AsterixDB targets semistructured data, you can use <i>open</i> datatypes and tell it as little or as much as you wish about your data up front; the more you tell it up front, the less information it will have to store repeatedly in the individual data instances that you give it. Instances of open datatypes are permitted to have additional content, beyond what the datatype says, as long as they at least contain the information prescribed by the datatype definition. Open typing allows data to vary from one instance to another and it leaves wiggle room for application evolution in terms of what might need to be stored in the future. If you want to restrict data instances in a dataset to have only what the datatype says, and nothing extra, you can define a <i>closed</i> datatype for that dataset and AsterixDB will keep users from storing objects that have extra data in them. Datatypes are open by default unless you tell AsterixDB otherwise. Let&#x2019;s put these concepts to work.</p>
+<p>Our little sample scenario involves information about users of two hypothetical social networks, Gleambook and Chirp, and their messages. We&#x2019;ll start by defining a dataverse called &#x201c;TinySocial&#x201d; to hold our datatypes and datasets. The AsterixDB data model (ADM) is essentially a superset of JSON&#x2014;it&#x2019;s what you get by extending JSON with more data types and additional data modeling constructs borrowed from object databases. The following shows how we can create the TinySocial dataverse plus a set of ADM types for modeling Chirp users, their Chirps, Gleambook users, their users&#x2019; employment information, and their messages. (Note: Keep in mind that this is just a tiny and somewhat silly example intended for illustrating some of the key features of AsterixDB. :-)) As a point of information, SQL++ is case-insensitive for both keywords and built-in type names, so the exact style of the examples below is just one of a number of possibilities.</p>
+
+<div>
+<div>
+<pre class="source">    DROP DATAVERSE TinySocial IF EXISTS;
+    CREATE DATAVERSE TinySocial;
+    USE TinySocial;
+
+    CREATE TYPE ChirpUserType AS {
+        screenName: string,
+        lang: string,
+        friendsCount: int,
+        statusesCount: int,
+        name: string,
+        followersCount: int
+    };
+
+    CREATE TYPE ChirpMessageType AS closed {
+        chirpId: string,
+        user: ChirpUserType,
+        senderLocation: point?,
+        sendTime: datetime,
+        referredTopics: {{ string }},
+        messageText: string
+    };
+
+    CREATE TYPE EmploymentType AS {
+        organizationName: string,
+        startDate: date,
+        endDate: date?
+    };
+
+    CREATE TYPE GleambookUserType AS {
+        id: int,
+        alias: string,
+        name: string,
+        userSince: datetime,
+        friendIds: {{ int }},
+        employment: [EmploymentType]
+    };
+
+    CREATE TYPE GleambookMessageType AS {
+        messageId: int,
+        authorId: int,
+        inResponseTo: int?,
+        senderLocation: point?,
+        message: string
+    };
+</pre></div></div>
+
+<p>The first three lines above tell AsterixDB to drop the old TinySocial dataverse, if one already exists, and then to create a brand new one and make it the focus of the statements that follow. The first <i>CREATE TYPE</i> statement creates a datatype for holding information about Chirp users. It is a object type with a mix of integer and string data, very much like a (flat) relational tuple. The indicated fields are all mandatory, but because the type is open, additional fields are welcome. The second statement creates a datatype for Chirp messages; this shows how to specify a closed type. Interestingly (based on one of Chirp&#x2019;s APIs), each Chirp message actually embeds an instance of the sending user&#x2019;s information (current as of when the message was sent), so this is an example of a nested object in ADM. Chirp messages can optionally contain the sender&#x2019;s location, which is modeled via the senderLocation field of spatial type <i>point</i>; the question mark following the field type indicates its optionality. An optional field is like a nullable field in SQL&#x2014;it may be present or missing, but when it&#x2019;s present, its value&#x2019;s data type will conform to the datatype&#x2019;s specification. The sendTime field illustrates the use of a temporal primitive type, <i>datetime</i>. Lastly, the referredTopics field illustrates another way that ADM is richer than the relational model; this field holds a bag (<i>a.k.a.</i> an unordered list) of strings. Since the overall datatype definition for Chirp messages says &#x201c;closed&#x201d;, the fields that it lists are the only fields that instances of this type will be allowed to contain. The next two <i>CREATE TYPE</i> statements create a object type for holding information about one component of the employment history of a Gleambook user and then a object type for holding the user information itself. The Gleambook user type highlights a few additional ADM data model features. Its friendIds field is a bag of integers, presumably the Gleambook user ids for this user&#x2019;s friends, and its employment field is an ordered list of employment objects. The final <i>CREATE TYPE</i> statement defines a type for handling the content of a Gleambook message in our hypothetical social data storage scenario.</p>
+<p>Before going on, we need to once again emphasize the idea that AsterixDB is aimed at storing and querying not just Big Data, but Big <i>Semistructured</i> Data. This means that most of the fields listed in the <i>CREATE TYPE</i> statements above could have been omitted without changing anything other than the resulting size of stored data instances on disk. AsterixDB stores its information about the fields defined a priori as separate metadata, whereas the information about other fields that are &#x201c;just there&#x201d; in instances of open datatypes is stored with each instance&#x2014;making for more bits on disk and longer times for operations affected by data size (e.g., dataset scans). The only fields that <i>must</i> be specified a priori are the primary key fields of each dataset.</p></div>
+<div class="section">
+<h3><a name="Creating_Datasets_and_Indexes"></a>Creating Datasets and Indexes</h3>
+<p>Now that we have defined our datatypes, we can move on and create datasets to store the actual data. (If we wanted to, we could even have several named datasets based on any one of these datatypes.) We can do this as follows, utilizing the SQL++ DDL capabilities of AsterixDB.</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    CREATE DATASET GleambookUsers(GleambookUserType)
+        PRIMARY KEY id;
+
+    CREATE DATASET GleambookMessages(GleambookMessageType)
+        PRIMARY KEY messageId;
+
+    CREATE DATASET ChirpUsers(ChirpUserType)
+        PRIMARY KEY screenName;
+
+    CREATE DATASET ChirpMessages(ChirpMessageType)
+        PRIMARY KEY chirpId
+        hints(cardinality=100);
+
+    CREATE INDEX gbUserSinceIdx on GleambookUsers(userSince);
+    CREATE INDEX gbAuthorIdx on GleambookMessages(authorId) TYPE btree;
+    CREATE INDEX gbSenderLocIndex on GleambookMessages(senderLocation) TYPE rtree;
+    CREATE INDEX gbMessageIdx on GleambookMessages(message) TYPE keyword;
+
+    SELECT VALUE ds FROM Metadata.`Dataset` ds;
+    SELECT VALUE ix FROM Metadata.`Index` ix;
+</pre></div></div>
+
+<p>The SQL++ DDL statements above create four datasets for holding our social data in the TinySocial dataverse: GleambookUsers, GleambookMessages, ChirpUsers, and ChirpMessages. The first <i>CREATE DATASET</i> statement creates the GleambookUsers data set. It specifies that this dataset will store data instances conforming to GleambookUserType and that it has a primary key which is the id field of each instance. The primary key information is used by AsterixDB to uniquely identify instances for the purpose of later lookup and for use in secondary indexes. Each AsterixDB dataset is stored (and indexed) in the form of a B+ tree on primary key; secondary indexes point to their indexed data by primary key. In AsterixDB clusters, the primary key is also used to hash-partition (<i>a.k.a.</i> shard) the dataset across the nodes of the cluster. The next three <i>CREATE DATASET</i> statements are similar. The last one illustrates an optional clause for providing useful hints to AsterixDB. In this case, the hint tells AsterixDB that the dataset definer is anticipating that the ChirpMessages dataset will contain roughly 100 objects; knowing this can help AsterixDB to more efficiently manage and query this dataset. (AsterixDB does not yet gather and maintain data statistics; it will currently, abitrarily, assume a cardinality of one million objects per dataset in the absence of such an optional definition-time hint.)</p>
+<p>The <i>CREATE DATASET</i> statements above are followed by four more DDL statements, each of which creates a secondary index on a field of one of the datasets. The first one indexes the GleambookUsers dataset on its user-since field. This index will be a B+ tree index; its type is unspecified and <i>btree</i> is the default type. The other three illustrate how you can explicitly specify the desired type of index. In addition to btree, <i>rtree</i> and inverted <i>keyword</i> indexes are supported by AsterixDB. Indexes can also have composite keys, and more advanced text indexing is available as well (ngram(k), where k is the desired gram length).</p></div>
+<div class="section">
+<h3><a name="Querying_the_Metadata_Dataverse"></a>Querying the Metadata Dataverse</h3>
+<p>The last two statements above show how you can use queries in SQL++ to examine the AsterixDB system catalogs and tell what artifacts you have created. Just as relational DBMSs use their own tables to store their catalogs, AsterixDB uses its own datasets to persist descriptions of its datasets, datatypes, indexes, and so on. Running the first of the two queries above will list all of your newly created datasets, and it will also show you a full list of all the metadata datasets. (You can then explore from there on your own if you are curious) These last two queries also illustrate a few other factoids worth knowing: First, AsterixDB allows queries to span dataverses via the use of fully-qualified dataset names (i.e., <i>dataversename.datasetname</i>) to reference datasets that live in a dataverse other than the one referenced in the most recently executed <i>USE</i> directive. Second, they show how to escape SQL++ keywords (or other special names) in object names by using backquotes. Last but not least, they show that SQL++ supports a <i>SELECT VALUE</i> variation of SQL&#x2019;s traditional <i>SELECT</i> statement that returns a single value (or element) from a query instead of constructing a new object as the query&#x2019;s result like <i>SELECT</i> does; here, the returned value is an entire object from the dataset being queried (e.g., <i>SELECT VALUE ds</i> in the first statement returns the entire object from the metadata dataset containing the descriptions of all datasets.</p></div></div>
+<div class="section">
+<h2><a name="Loading_Data_Into_AsterixDB"></a>Loading Data Into AsterixDB</h2>
+<p>Okay, so far so good&#x2014;AsterixDB is now ready for data, so let&#x2019;s give it some data to store. Our next task will be to insert some sample data into the four datasets that we just defined. Here we will load a tiny set of objects, defined in ADM format (a superset of JSON), into each dataset. In the boxes below you can see insert statements with a list of the objects to be inserted. The files themselves are also linked. Take a few minutes to look carefully at each of the sample data sets. This will give you a better sense of the nature of the data that we are about to load and query. We should note that ADM format is a textual serialization of what AsterixDB will actually store; when persisted in AsterixDB, the data format will be binary and the data in the predefined fields of the data instances will be stored separately from their associated field name and type metadata.</p>
+<p><a href="../data/chu.adm">Chirp Users</a></p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    INSERT INTO ChirpUsers
+    ([
+    {&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:18,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},
+    {&quot;screenName&quot;:&quot;ColineGeyer@63&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:121,&quot;statusesCount&quot;:362,&quot;name&quot;:&quot;Coline Geyer&quot;,&quot;followersCount&quot;:17159},
+    {&quot;screenName&quot;:&quot;NilaMilliron_tw&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:445,&quot;statusesCount&quot;:164,&quot;name&quot;:&quot;Nila Milliron&quot;,&quot;followersCount&quot;:22649},
+    {&quot;screenName&quot;:&quot;ChangEwing_573&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:182,&quot;statusesCount&quot;:394,&quot;name&quot;:&quot;Chang Ewing&quot;,&quot;followersCount&quot;:32136}
+    ]);
+</pre></div></div>
+
+<p><a href="../data/chm.adm">Chirp Messages</a></p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    INSERT INTO ChirpMessages
+    ([
+    {&quot;chirpId&quot;:&quot;1&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;47.44,80.65&quot;),&quot;sendTime&quot;:datetime(&quot;2008-04-26T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-z&quot;,&quot;customization&quot;}},&quot;messageText&quot;:&quot; love product-z its customization is good:)&quot;},
+    {&quot;chirpId&quot;:&quot;2&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;ColineGeyer@63&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:121,&quot;statusesCount&quot;:362,&quot;name&quot;:&quot;Coline Geyer&quot;,&quot;followersCount&quot;:17159},&quot;senderLocation&quot;:point(&quot;32.84,67.14&quot;),&quot;sendTime&quot;:datetime(&quot;2010-05-13T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;ccast&quot;,&quot;shortcut-menu&quot;}},&quot;messageText&quot;:&quot; like ccast its shortcut-menu is awesome:)&quot;},
+    {&quot;chirpId&quot;:&quot;3&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;29.72,75.8&quot;),&quot;sendTime&quot;:datetime(&quot;2006-11-04T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-w&quot;,&quot;speed&quot;}},&quot;messageText&quot;:&quot; like product-w the speed is good:)&quot;},
+    {&quot;chirpId&quot;:&quot;4&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;39.28,70.48&quot;),&quot;sendTime&quot;:datetime(&quot;2011-12-26T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-b&quot;,&quot;voice-command&quot;}},&quot;messageText&quot;:&quot; like product-b the voice-command is mind-blowing:)&quot;},
+    {&quot;chirpId&quot;:&quot;5&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;40.09,92.69&quot;),&quot;sendTime&quot;:datetime(&quot;2006-08-04T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-w&quot;,&quot;speed&quot;}},&quot;messageText&quot;:&quot; can't stand product-w its speed is terrible:(&quot;},
+    {&quot;chirpId&quot;:&quot;6&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;ColineGeyer@63&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:121,&quot;statusesCount&quot;:362,&quot;name&quot;:&quot;Coline Geyer&quot;,&quot;followersCount&quot;:17159},&quot;senderLocation&quot;:point(&quot;47.51,83.99&quot;),&quot;sendTime&quot;:datetime(&quot;2010-05-07T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;x-phone&quot;,&quot;voice-clarity&quot;}},&quot;messageText&quot;:&quot; like x-phone the voice-clarity is good:)&quot;},
+    {&quot;chirpId&quot;:&quot;7&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;ChangEwing_573&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:182,&quot;statusesCount&quot;:394,&quot;name&quot;:&quot;Chang Ewing&quot;,&quot;followersCount&quot;:32136},&quot;senderLocation&quot;:point(&quot;36.21,72.6&quot;),&quot;sendTime&quot;:datetime(&quot;2011-08-25T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-y&quot;,&quot;platform&quot;}},&quot;messageText&quot;:&quot; like product-y the platform is good&quot;},
+    {&quot;chirpId&quot;:&quot;8&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;46.05,93.34&quot;),&quot;sendTime&quot;:datetime(&quot;2005-10-14T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-z&quot;,&quot;shortcut-menu&quot;}},&quot;messageText&quot;:&quot; like product-z the shortcut-menu is awesome:)&quot;},
+    {&quot;chirpId&quot;:&quot;9&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NathanGiesen@211&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:39339,&quot;statusesCount&quot;:473,&quot;name&quot;:&quot;Nathan Giesen&quot;,&quot;followersCount&quot;:49416},&quot;senderLocation&quot;:point(&quot;36.86,74.62&quot;),&quot;sendTime&quot;:datetime(&quot;2012-07-21T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;ccast&quot;,&quot;voicemail-service&quot;}},&quot;messageText&quot;:&quot; love ccast its voicemail-service is awesome&quot;},
+    {&quot;chirpId&quot;:&quot;10&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;ColineGeyer@63&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:121,&quot;statusesCount&quot;:362,&quot;name&quot;:&quot;Coline Geyer&quot;,&quot;followersCount&quot;:17159},&quot;senderLocation&quot;:point(&quot;29.15,76.53&quot;),&quot;sendTime&quot;:datetime(&quot;2008-01-26T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;ccast&quot;,&quot;voice-clarity&quot;}},&quot;messageText&quot;:&quot; hate ccast its voice-clarity is OMG:(&quot;},
+    {&quot;chirpId&quot;:&quot;11&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;NilaMilliron_tw&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:445,&quot;statusesCount&quot;:164,&quot;name&quot;:&quot;Nila Milliron&quot;,&quot;followersCount&quot;:22649},&quot;senderLocation&quot;:point(&quot;37.59,68.42&quot;),&quot;sendTime&quot;:datetime(&quot;2008-03-09T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;x-phone&quot;,&quot;platform&quot;}},&quot;messageText&quot;:&quot; can't stand x-phone its platform is terrible&quot;},
+    {&quot;chirpId&quot;:&quot;12&quot;,&quot;user&quot;:{&quot;screenName&quot;:&quot;OliJackson_512&quot;,&quot;lang&quot;:&quot;en&quot;,&quot;friendsCount&quot;:445,&quot;statusesCount&quot;:164,&quot;name&quot;:&quot;Oli Jackson&quot;,&quot;followersCount&quot;:22649},&quot;senderLocation&quot;:point(&quot;24.82,94.63&quot;),&quot;sendTime&quot;:datetime(&quot;2010-02-13T10:10:00&quot;),&quot;referredTopics&quot;:{{&quot;product-y&quot;,&quot;voice-command&quot;}},&quot;messageText&quot;:&quot; like product-y the voice-command is amazing:)&quot;}
+    ]);
+</pre></div></div>
+
+<p><a href="../data/gbu.adm">Gleambook Users</a></p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    INSERT INTO GleambookUsers
+    ([
+    {&quot;id&quot;:1,&quot;alias&quot;:&quot;Margarita&quot;,&quot;name&quot;:&quot;MargaritaStoddard&quot;,&quot;nickname&quot;:&quot;Mags&quot;,&quot;userSince&quot;:datetime(&quot;2012-08-20T10:10:00&quot;),&quot;friendIds&quot;:{{2,3,6,10}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Codetechno&quot;,&quot;startDate&quot;:date(&quot;2006-08-06&quot;)},{&quot;organizationName&quot;:&quot;geomedia&quot;,&quot;startDate&quot;:date(&quot;2010-06-17&quot;),&quot;endDate&quot;:date(&quot;2010-01-26&quot;)}],&quot;gender&quot;:&quot;F&quot;},
+    {&quot;id&quot;:2,&quot;alias&quot;:&quot;Isbel&quot;,&quot;name&quot;:&quot;IsbelDull&quot;,&quot;nickname&quot;:&quot;Izzy&quot;,&quot;userSince&quot;:datetime(&quot;2011-01-22T10:10:00&quot;),&quot;friendIds&quot;:{{1,4}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Hexviafind&quot;,&quot;startDate&quot;:date(&quot;2010-04-27&quot;)}]},
+    {&quot;id&quot;:3,&quot;alias&quot;:&quot;Emory&quot;,&quot;name&quot;:&quot;EmoryUnk&quot;,&quot;userSince&quot;:datetime(&quot;2012-07-10T10:10:00&quot;),&quot;friendIds&quot;:{{1,5,8,9}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;geomedia&quot;,&quot;startDate&quot;:date(&quot;2010-06-17&quot;),&quot;endDate&quot;:date(&quot;2010-01-26&quot;)}]},
+    {&quot;id&quot;:4,&quot;alias&quot;:&quot;Nicholas&quot;,&quot;name&quot;:&quot;NicholasStroh&quot;,&quot;userSince&quot;:datetime(&quot;2010-12-27T10:10:00&quot;),&quot;friendIds&quot;:{{2}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Zamcorporation&quot;,&quot;startDate&quot;:date(&quot;2010-06-08&quot;)}]},
+    {&quot;id&quot;:5,&quot;alias&quot;:&quot;Von&quot;,&quot;name&quot;:&quot;VonKemble&quot;,&quot;userSince&quot;:datetime(&quot;2010-01-05T10:10:00&quot;),&quot;friendIds&quot;:{{3,6,10}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Kongreen&quot;,&quot;startDate&quot;:date(&quot;2010-11-27&quot;)}]},
+    {&quot;id&quot;:6,&quot;alias&quot;:&quot;Willis&quot;,&quot;name&quot;:&quot;WillisWynne&quot;,&quot;userSince&quot;:datetime(&quot;2005-01-17T10:10:00&quot;),&quot;friendIds&quot;:{{1,3,7}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;jaydax&quot;,&quot;startDate&quot;:date(&quot;2009-05-15&quot;)}]},
+    {&quot;id&quot;:7,&quot;alias&quot;:&quot;Suzanna&quot;,&quot;name&quot;:&quot;SuzannaTillson&quot;,&quot;userSince&quot;:datetime(&quot;2012-08-07T10:10:00&quot;),&quot;friendIds&quot;:{{6}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Labzatron&quot;,&quot;startDate&quot;:date(&quot;2011-04-19&quot;)}]},
+    {&quot;id&quot;:8,&quot;alias&quot;:&quot;Nila&quot;,&quot;name&quot;:&quot;NilaMilliron&quot;,&quot;userSince&quot;:datetime(&quot;2008-01-01T10:10:00&quot;),&quot;friendIds&quot;:{{3}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Plexlane&quot;,&quot;startDate&quot;:date(&quot;2010-02-28&quot;)}]},
+    {&quot;id&quot;:9,&quot;alias&quot;:&quot;Woodrow&quot;,&quot;name&quot;:&quot;WoodrowNehling&quot;,&quot;nickname&quot;:&quot;Woody&quot;,&quot;userSince&quot;:datetime(&quot;2005-09-20T10:10:00&quot;),&quot;friendIds&quot;:{{3,10}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;Zuncan&quot;,&quot;startDate&quot;:date(&quot;2003-04-22&quot;),&quot;endDate&quot;:date(&quot;2009-12-13&quot;)}]},
+    {&quot;id&quot;:10,&quot;alias&quot;:&quot;Bram&quot;,&quot;name&quot;:&quot;BramHatch&quot;,&quot;userSince&quot;:datetime(&quot;2010-10-16T10:10:00&quot;),&quot;friendIds&quot;:{{1,5,9}},&quot;employment&quot;:[{&quot;organizationName&quot;:&quot;physcane&quot;,&quot;startDate&quot;:date(&quot;2007-06-05&quot;),&quot;endDate&quot;:date(&quot;2011-11-05&quot;)}]}
+    ]);
+</pre></div></div>
+
+<p><a href="../data/gbm.adm">Gleambook Messages</a></p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    INSERT INTO GleambookMessages
+    ([
+    {&quot;messageId&quot;:1,&quot;authorId&quot;:3,&quot;inResponseTo&quot;:2,&quot;senderLocation&quot;:point(&quot;47.16,77.75&quot;),&quot;message&quot;:&quot; love product-b its shortcut-menu is awesome:)&quot;},
+    {&quot;messageId&quot;:2,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:4,&quot;senderLocation&quot;:point(&quot;41.66,80.87&quot;),&quot;message&quot;:&quot; dislike x-phone its touch-screen is horrible&quot;},
+    {&quot;messageId&quot;:3,&quot;authorId&quot;:2,&quot;inResponseTo&quot;:4,&quot;senderLocation&quot;:point(&quot;48.09,81.01&quot;),&quot;message&quot;:&quot; like product-y the plan is amazing&quot;},
+    {&quot;messageId&quot;:4,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:2,&quot;senderLocation&quot;:point(&quot;37.73,97.04&quot;),&quot;message&quot;:&quot; can't stand acast the network is horrible:(&quot;},
+    {&quot;messageId&quot;:5,&quot;authorId&quot;:6,&quot;inResponseTo&quot;:2,&quot;senderLocation&quot;:point(&quot;34.7,90.76&quot;),&quot;message&quot;:&quot; love product-b the customization is mind-blowing&quot;},
+    {&quot;messageId&quot;:6,&quot;authorId&quot;:2,&quot;inResponseTo&quot;:1,&quot;senderLocation&quot;:point(&quot;31.5,75.56&quot;),&quot;message&quot;:&quot; like product-z its platform is mind-blowing&quot;},
+    {&quot;messageId&quot;:7,&quot;authorId&quot;:5,&quot;inResponseTo&quot;:15,&quot;senderLocation&quot;:point(&quot;32.91,85.05&quot;),&quot;message&quot;:&quot; dislike product-b the speed is horrible&quot;},
+    {&quot;messageId&quot;:8,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:11,&quot;senderLocation&quot;:point(&quot;40.33,80.87&quot;),&quot;message&quot;:&quot; like ccast the 3G is awesome:)&quot;},
+    {&quot;messageId&quot;:9,&quot;authorId&quot;:3,&quot;inResponseTo&quot;:12,&quot;senderLocation&quot;:point(&quot;34.45,96.48&quot;),&quot;message&quot;:&quot; love ccast its wireless is good&quot;},
+    {&quot;messageId&quot;:10,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:12,&quot;senderLocation&quot;:point(&quot;42.5,70.01&quot;),&quot;message&quot;:&quot; can't stand product-w the touch-screen is terrible&quot;},
+    {&quot;messageId&quot;:11,&quot;authorId&quot;:1,&quot;inResponseTo&quot;:1,&quot;senderLocation&quot;:point(&quot;38.97,77.49&quot;),&quot;message&quot;:&quot; can't stand acast its plan is terrible&quot;},
+    {&quot;messageId&quot;:12,&quot;authorId&quot;:10,&quot;inResponseTo&quot;:6,&quot;senderLocation&quot;:point(&quot;42.26,77.76&quot;),&quot;message&quot;:&quot; can't stand product-z its voicemail-service is OMG:(&quot;},
+    {&quot;messageId&quot;:13,&quot;authorId&quot;:10,&quot;inResponseTo&quot;:4,&quot;senderLocation&quot;:point(&quot;42.77,78.92&quot;),&quot;message&quot;:&quot; dislike x-phone the voice-command is bad:(&quot;},
+    {&quot;messageId&quot;:14,&quot;authorId&quot;:9,&quot;inResponseTo&quot;:12,&quot;senderLocation&quot;:point(&quot;41.33,85.28&quot;),&quot;message&quot;:&quot; love acast its 3G is good:)&quot;},
+    {&quot;messageId&quot;:15,&quot;authorId&quot;:7,&quot;inResponseTo&quot;:11,&quot;senderLocation&quot;:point(&quot;44.47,67.11&quot;),&quot;message&quot;:&quot; like x-phone the voicemail-service is awesome&quot;}
+    ]);
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="SQL.2B.2B:_Querying_Your_AsterixDB_Data"></a>SQL++: Querying Your AsterixDB Data</h2>
+<p>Congratulations! You now have sample social data stored (and indexed) in AsterixDB. (You are part of an elite and adventurous group of individuals. :-)) Now that you have successfully loaded the provided sample data into the datasets that we defined, you can start running queries against them.</p>
+<p>AsterixDB currently supports two query languages. The first&#x2014;AsterixDB&#x2019;s original query language&#x2014;is AQL (the Asterix Query Language). The AQL language was inspired by XQuery, the W3C standard language for querying XML data. (There is a version of this tutorial for AQL if you would like to learn more about it.) The query language described in the remainder of this tutorial is SQL++, a SQL-inspired language designed (as AQL was) for working with semistructured data. SQL++ has much in common with SQL, but there are differences due to the data model that SQL++ is designed to serve. SQL was designed in the 1970&#x2019;s to interact with the flat, schema-ified world of relational databases. SQL++ is designed for the nested, schema-less (or schema-optional, in AsterixDB) world of NoSQL systems. While SQL++ has the same expressive power as AQL, it offers a more familar paradigm for experienced SQL users to use to query and manipulate data in AsterixDB.</p>
+<p>In this section we introduce SQL++ via a set of example queries, along with their expected results, based on the data above, to help you get started. Many of the most important features of SQL++ are presented in this set of representative queries. You can find more details in the document on the <a href="datamodel.html">Asterix Data Model (ADM)</a>, in the <a href="manual-sqlpp.html">SQL++ Reference Manual</a>, and a complete list of built-in functions is available in the <a href="functions-sqlpp.html">SQL++ Functions</a> document.</p>
+<p>SQL++ is an expression language. Even the simple expression 1+1 is a valid SQL++ query that evaluates to 2. (Try it for yourself! Okay, maybe that&#x2019;s <i>not</i> the best use of a 512-node shared-nothing compute cluster.) But enough talk! Let&#x2019;s go ahead and try writing some queries and see about learning SQL++ by example. (Again, don&#x2019;t forget to choose SQL++ as the query language in the web interface!)</p>
+<div class="section">
+<h3><a name="Query_0-A_-_Exact-Match_Lookup"></a>Query 0-A - Exact-Match Lookup</h3>
+<p>For our first query, let&#x2019;s find a Gleambook user based on his or her user id. Suppose the user we want is the user whose id is 8:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE user
+    FROM GleambookUsers user
+    WHERE user.id = 8;
+</pre></div></div>
+
+<p>As in SQL, the query&#x2019;s <i>FROM</i> clause  binds the variable <tt>user</tt> incrementally to the data instances residing in the dataset named GleambookUsers. Its <i>WHERE</i> clause  selects only those bindings having a user id of interest, filtering out the rest. The <i>SELECT</i> <i>VALUE</i> clause returns the (entire) data value (a Gleambook user object in this case) for each binding that satisfies the predicate. Since this dataset is indexed on user id (its primary key), this query will be done via a quick index lookup.</p>
+<p>The expected result for our sample data is as follows:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 8, &quot;alias&quot;: &quot;Nila&quot;, &quot;name&quot;: &quot;NilaMilliron&quot;, &quot;userSince&quot;: datetime(&quot;2008-01-01T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Plexlane&quot;, &quot;startDate&quot;: date(&quot;2010-02-28&quot;) } ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_0-B_-_Range_Scan"></a>Query 0-B - Range Scan</h3>
+<p>SQL++, like SQL, supports a variety of different predicates. For example, for our next query, let&#x2019;s find the Gleambook users whose ids are in the range between 2 and 4:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE user
+    FROM GleambookUsers user
+    WHERE user.id &gt;= 2 AND user.id &lt;= 4;
+</pre></div></div>
+
+<p>This query&#x2019;s expected result, also evaluable using the primary index on user id, is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 2, &quot;alias&quot;: &quot;Isbel&quot;, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;userSince&quot;: datetime(&quot;2011-01-22T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 4 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Hexviafind&quot;, &quot;startDate&quot;: date(&quot;2010-04-27&quot;) } ], &quot;nickname&quot;: &quot;Izzy&quot; }
+    { &quot;id&quot;: 3, &quot;alias&quot;: &quot;Emory&quot;, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;userSince&quot;: datetime(&quot;2012-07-10T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 8, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ] }
+    { &quot;id&quot;: 4, &quot;alias&quot;: &quot;Nicholas&quot;, &quot;name&quot;: &quot;NicholasStroh&quot;, &quot;userSince&quot;: datetime(&quot;2010-12-27T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Zamcorporation&quot;, &quot;startDate&quot;: date(&quot;2010-06-08&quot;) } ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_1_-_Other_Query_Filters"></a>Query 1 - Other Query Filters</h3>
+<p>SQL++ can do range queries on any data type that supports the appropriate set of comparators. As an example, this next query retrieves the Gleambook users who joined between July 22, 2010 and July 29, 2012:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE user
+    FROM GleambookUsers user
+    WHERE user.userSince &gt;= datetime('2010-07-22T00:00:00')
+      AND user.userSince &lt;= datetime('2012-07-29T23:59:59');
+</pre></div></div>
+
+<p>The expected result for this query, also an indexable query, is as follows:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 10, &quot;alias&quot;: &quot;Bram&quot;, &quot;name&quot;: &quot;BramHatch&quot;, &quot;userSince&quot;: datetime(&quot;2010-10-16T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;physcane&quot;, &quot;startDate&quot;: date(&quot;2007-06-05&quot;), &quot;endDate&quot;: date(&quot;2011-11-05&quot;) } ] }
+    { &quot;id&quot;: 2, &quot;alias&quot;: &quot;Isbel&quot;, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;userSince&quot;: datetime(&quot;2011-01-22T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 4 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Hexviafind&quot;, &quot;startDate&quot;: date(&quot;2010-04-27&quot;) } ], &quot;nickname&quot;: &quot;Izzy&quot; }
+    { &quot;id&quot;: 3, &quot;alias&quot;: &quot;Emory&quot;, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;userSince&quot;: datetime(&quot;2012-07-10T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 8, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ] }
+    { &quot;id&quot;: 4, &quot;alias&quot;: &quot;Nicholas&quot;, &quot;name&quot;: &quot;NicholasStroh&quot;, &quot;userSince&quot;: datetime(&quot;2010-12-27T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Zamcorporation&quot;, &quot;startDate&quot;: date(&quot;2010-06-08&quot;) } ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_2-A_-_Equijoin"></a>Query 2-A - Equijoin</h3>
+<p>In addition to simply binding variables to data instances and returning them &#x201c;whole&#x201d;, an SQL++ query can construct new ADM instances to return based on combinations of its variable bindings. This gives SQL++ the power to do projections and joins much like those done using multi-table <i>FROM</i> clauses in SQL. For example, suppose we wanted a list of all Gleambook users paired with their associated messages, with the list enumerating the author name and the message text associated with each Gleambook message. We could do this as follows in SQL++:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT user.name AS uname, msg.message AS message
+    FROM GleambookUsers user, GleambookMessages msg
+    WHERE msg.authorId = user.id;
+</pre></div></div>
+
+<p>The result of this query is a sequence of new ADM instances, one for each author/message pair. Each instance in the result will be an ADM object containing two fields, &#x201c;uname&#x201d; and &#x201c;message&#x201d;, containing the user&#x2019;s name and the message text, respectively, for each author/message pair. Notice how the use of a traditional SQL-style <i>SELECT</i> clause, as opposed to the new SQL++ <i>SELECT VALUE</i> clause, automatically results in the construction of a new object value for each result.</p>
+<p>The expected result of this example SQL++ join query for our sample data set is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;uname&quot;: &quot;WillisWynne&quot;, &quot;message&quot;: &quot; love product-b the customization is mind-blowing&quot; }
+    { &quot;uname&quot;: &quot;WoodrowNehling&quot;, &quot;message&quot;: &quot; love acast its 3G is good:)&quot; }
+    { &quot;uname&quot;: &quot;BramHatch&quot;, &quot;message&quot;: &quot; can't stand product-z its voicemail-service is OMG:(&quot; }
+    { &quot;uname&quot;: &quot;BramHatch&quot;, &quot;message&quot;: &quot; dislike x-phone the voice-command is bad:(&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot; }
+    { &quot;uname&quot;: &quot;IsbelDull&quot;, &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot; }
+    { &quot;uname&quot;: &quot;IsbelDull&quot;, &quot;message&quot;: &quot; like product-y the plan is amazing&quot; }
+    { &quot;uname&quot;: &quot;EmoryUnk&quot;, &quot;message&quot;: &quot; love ccast its wireless is good&quot; }
+    { &quot;uname&quot;: &quot;EmoryUnk&quot;, &quot;message&quot;: &quot; love product-b its shortcut-menu is awesome:)&quot; }
+    { &quot;uname&quot;: &quot;VonKemble&quot;, &quot;message&quot;: &quot; dislike product-b the speed is horrible&quot; }
+    { &quot;uname&quot;: &quot;SuzannaTillson&quot;, &quot;message&quot;: &quot; like x-phone the voicemail-service is awesome&quot; }
+</pre></div></div>
+
+<p>If we were feeling lazy, we might use <i>SELECT *</i> in SQL++ to return all of the matching user/message data:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT *
+    FROM GleambookUsers user, GleambookMessages msg
+    WHERE msg.authorId = user.id;
+</pre></div></div>
+
+<p>In SQL++, this <i>SELECT *</i> query will produce a new nested object for each user/message pair. Each result object contains one field (named after the &#x201c;user&#x201d; variable) to hold the user object and another field (named after the &#x201c;msg&#x201d; variable) to hold the matching message object. Note that the nested nature of this SQL++ <i>SELECT *</i> result is different than traditional SQL, as SQL was not designed to handle the richer, nested data model that underlies the design of SQL++.</p>
+<p>The expected result of this version of the SQL++ join query for our sample data set is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;user&quot;: { &quot;id&quot;: 6, &quot;alias&quot;: &quot;Willis&quot;, &quot;name&quot;: &quot;WillisWynne&quot;, &quot;userSince&quot;: datetime(&quot;2005-01-17T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 3, 7 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;jaydax&quot;, &quot;startDate&quot;: date(&quot;2009-05-15&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 5, &quot;authorId&quot;: 6, &quot;inResponseTo&quot;: 2, &quot;senderLocation&quot;: point(&quot;34.7,90.76&quot;), &quot;message&quot;: &quot; love product-b the customization is mind-blowing&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 9, &quot;alias&quot;: &quot;Woodrow&quot;, &quot;name&quot;: &quot;WoodrowNehling&quot;, &quot;userSince&quot;: datetime(&quot;2005-09-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Zuncan&quot;, &quot;startDate&quot;: date(&quot;2003-04-22&quot;), &quot;endDate&quot;: date(&quot;2009-12-13&quot;) } ], &quot;nickname&quot;: &quot;Woody&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 14, &quot;authorId&quot;: 9, &quot;inResponseTo&quot;: 12, &quot;senderLocation&quot;: point(&quot;41.33,85.28&quot;), &quot;message&quot;: &quot; love acast its 3G is good:)&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 10, &quot;alias&quot;: &quot;Bram&quot;, &quot;name&quot;: &quot;BramHatch&quot;, &quot;userSince&quot;: datetime(&quot;2010-10-16T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;physcane&quot;, &quot;startDate&quot;: date(&quot;2007-06-05&quot;), &quot;endDate&quot;: date(&quot;2011-11-05&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 12, &quot;authorId&quot;: 10, &quot;inResponseTo&quot;: 6, &quot;senderLocation&quot;: point(&quot;42.26,77.76&quot;), &quot;message&quot;: &quot; can't stand product-z its voicemail-service is OMG:(&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 10, &quot;alias&quot;: &quot;Bram&quot;, &quot;name&quot;: &quot;BramHatch&quot;, &quot;userSince&quot;: datetime(&quot;2010-10-16T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;physcane&quot;, &quot;startDate&quot;: date(&quot;2007-06-05&quot;), &quot;endDate&quot;: date(&quot;2011-11-05&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 13, &quot;authorId&quot;: 10, &quot;inResponseTo&quot;: 4, &quot;senderLocation&quot;: point(&quot;42.77,78.92&quot;), &quot;message&quot;: &quot; dislike x-phone the voice-command is bad:(&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 8, &quot;authorId&quot;: 1, &quot;inResponseTo&quot;: 11, &quot;senderLocation&quot;: point(&quot;40.33,80.87&quot;), &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 10, &quot;authorId&quot;: 1, &quot;inResponseTo&quot;: 12, &quot;senderLocation&quot;: point(&quot;42.5,70.01&quot;), &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 11, &quot;authorId&quot;: 1, &quot;inResponseTo&quot;: 1, &quot;senderLocation&quot;: point(&quot;38.97,77.49&quot;), &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 2, &quot;authorId&quot;: 1, &quot;inResponseTo&quot;: 4, &quot;senderLocation&quot;: point(&quot;41.66,80.87&quot;), &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 4, &quot;authorId&quot;: 1, &quot;inResponseTo&quot;: 2, &quot;senderLocation&quot;: point(&quot;37.73,97.04&quot;), &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 2, &quot;alias&quot;: &quot;Isbel&quot;, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;userSince&quot;: datetime(&quot;2011-01-22T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 4 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Hexviafind&quot;, &quot;startDate&quot;: date(&quot;2010-04-27&quot;) } ], &quot;nickname&quot;: &quot;Izzy&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 6, &quot;authorId&quot;: 2, &quot;inResponseTo&quot;: 1, &quot;senderLocation&quot;: point(&quot;31.5,75.56&quot;), &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 2, &quot;alias&quot;: &quot;Isbel&quot;, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;userSince&quot;: datetime(&quot;2011-01-22T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 4 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Hexviafind&quot;, &quot;startDate&quot;: date(&quot;2010-04-27&quot;) } ], &quot;nickname&quot;: &quot;Izzy&quot; }, &quot;msg&quot;: { &quot;messageId&quot;: 3, &quot;authorId&quot;: 2, &quot;inResponseTo&quot;: 4, &quot;senderLocation&quot;: point(&quot;48.09,81.01&quot;), &quot;message&quot;: &quot; like product-y the plan is amazing&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 3, &quot;alias&quot;: &quot;Emory&quot;, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;userSince&quot;: datetime(&quot;2012-07-10T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 8, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 9, &quot;authorId&quot;: 3, &quot;inResponseTo&quot;: 12, &quot;senderLocation&quot;: point(&quot;34.45,96.48&quot;), &quot;message&quot;: &quot; love ccast its wireless is good&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 3, &quot;alias&quot;: &quot;Emory&quot;, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;userSince&quot;: datetime(&quot;2012-07-10T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 8, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 1, &quot;authorId&quot;: 3, &quot;inResponseTo&quot;: 2, &quot;senderLocation&quot;: point(&quot;47.16,77.75&quot;), &quot;message&quot;: &quot; love product-b its shortcut-menu is awesome:)&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 5, &quot;alias&quot;: &quot;Von&quot;, &quot;name&quot;: &quot;VonKemble&quot;, &quot;userSince&quot;: datetime(&quot;2010-01-05T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Kongreen&quot;, &quot;startDate&quot;: date(&quot;2010-11-27&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 7, &quot;authorId&quot;: 5, &quot;inResponseTo&quot;: 15, &quot;senderLocation&quot;: point(&quot;32.91,85.05&quot;), &quot;message&quot;: &quot; dislike product-b the speed is horrible&quot; } }
+    { &quot;user&quot;: { &quot;id&quot;: 7, &quot;alias&quot;: &quot;Suzanna&quot;, &quot;name&quot;: &quot;SuzannaTillson&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-07T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 6 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Labzatron&quot;, &quot;startDate&quot;: date(&quot;2011-04-19&quot;) } ] }, &quot;msg&quot;: { &quot;messageId&quot;: 15, &quot;authorId&quot;: 7, &quot;inResponseTo&quot;: 11, &quot;senderLocation&quot;: point(&quot;44.47,67.11&quot;), &quot;message&quot;: &quot; like x-phone the voicemail-service is awesome&quot; } }
+</pre></div></div>
+
+<p>Finally (for now :-)), another less lazy and more explicit SQL++ way of achieving the result shown above is:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE {&quot;user&quot;: user, &quot;message&quot;: msg}
+    FROM GleambookUsers user, GleambookMessages msg
+    WHERE msg.authorId = user.id;
+</pre></div></div>
+
+<p>This version of the query uses an explicit object constructor to build each result object. (Note that &#x201c;uname&#x201d; and &#x201c;message&#x201d; are both simple SQL++ expressions themselves&#x2014;so in the most general case, even the resulting field names can be computed as part of the query, making SQL++ a very powerful tool for slicing and dicing semistructured data.)</p></div>
+<div class="section">
+<h3><a name="Query_2-B_-_Index_join"></a>Query 2-B - Index join</h3>
+<p>By default, AsterixDB evaluates equijoin queries using hash-based join methods that work well for doing ad hoc joins of very large data sets (<a class="externalLink" href="http://en.wikipedia.org/wiki/Hash_join">http://en.wikipedia.org/wiki/Hash_join</a>). On a cluster, hash partitioning is employed as AsterixDB&#x2019;s divide-and-conquer strategy for computing large parallel joins. AsterixDB includes other join methods, but in the absence of data statistics and selectivity estimates, it doesn&#x2019;t (yet) have the know-how to intelligently choose among its alternatives. We therefore asked ourselves the classic question&#x2014;WWOD?&#x2014;What Would Oracle Do?&#x2014;and in the interim, SQL++ includes a clunky (but useful) hint-based mechanism for addressing the occasional need to suggest to AsterixDB which join method it should use for a particular SQL++ query.</p>
+<p>The following query is similar to the first version of Query 2-A but includes a suggestion to AsterixDB that it should consider employing an index-based nested-loop join technique to process the query:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT user.name AS uname, msg.message AS message
+    FROM GleambookUsers user, GleambookMessages msg
+    WHERE msg.authorId /*+ indexnl */ = user.id;
+</pre></div></div>
+
+<p>In addition to illustrating the use of a hint, the query also shows how to achieve the same result object format using <i>SELECT</i> and <i>AS</i> instead of using an explicit object constructor. The expected result is (of course) the same as before, modulo the order of the instances. Result ordering is (intentionally) undefined in SQL++ in the absence of an <i>ORDER BY</i> clause. The query result for our sample data in this case is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;uname&quot;: &quot;IsbelDull&quot;, &quot;message&quot;: &quot; like product-z its platform is mind-blowing&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; like ccast the 3G is awesome:)&quot; }
+    { &quot;uname&quot;: &quot;EmoryUnk&quot;, &quot;message&quot;: &quot; love ccast its wireless is good&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand product-w the touch-screen is terrible&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand acast its plan is terrible&quot; }
+    { &quot;uname&quot;: &quot;BramHatch&quot;, &quot;message&quot;: &quot; can't stand product-z its voicemail-service is OMG:(&quot; }
+    { &quot;uname&quot;: &quot;WoodrowNehling&quot;, &quot;message&quot;: &quot; love acast its 3G is good:)&quot; }
+    { &quot;uname&quot;: &quot;EmoryUnk&quot;, &quot;message&quot;: &quot; love product-b its shortcut-menu is awesome:)&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; dislike x-phone its touch-screen is horrible&quot; }
+    { &quot;uname&quot;: &quot;IsbelDull&quot;, &quot;message&quot;: &quot; like product-y the plan is amazing&quot; }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;message&quot;: &quot; can't stand acast the network is horrible:(&quot; }
+    { &quot;uname&quot;: &quot;WillisWynne&quot;, &quot;message&quot;: &quot; love product-b the customization is mind-blowing&quot; }
+    { &quot;uname&quot;: &quot;VonKemble&quot;, &quot;message&quot;: &quot; dislike product-b the speed is horrible&quot; }
+    { &quot;uname&quot;: &quot;BramHatch&quot;, &quot;message&quot;: &quot; dislike x-phone the voice-command is bad:(&quot; }
+    { &quot;uname&quot;: &quot;SuzannaTillson&quot;, &quot;message&quot;: &quot; like x-phone the voicemail-service is awesome&quot; }
+</pre></div></div>
+
+<p>(It is worth knowing, with respect to influencing AsterixDB&#x2019;s query evaluation, that <i>FROM</i> clauses&#x2014;<i>a.k.a.</i> joins&#x2014; are currently evaluated in order, with the &#x201c;left&#x201d; clause probing the data of the &#x201c;right&#x201d; clause. SQL++ also supports SQL-style <i>JOIN</i> clauses, and the same is true for those.)</p></div>
+<div class="section">
+<h3><a name="Query_3_-_Nested_Outer_Join"></a>Query 3 - Nested Outer Join</h3>
+<p>In order to support joins between tables with missing/dangling join tuples, the designers of SQL ended up shoe-horning a subset of the relational algebra into SQL&#x2019;s <i>FROM</i> clause syntax&#x2014;and providing a variety of join types there for users to choose from (which SQL++ supports for SQL compatibility). Left outer joins are particularly important in SQL, e.g., to print a summary of customers and orders, grouped by customer, without omitting those customers who haven&#x2019;t placed any orders yet.</p>
+<p>The SQL++ language supports nesting, both of queries and of query results, and the combination allows for an arguably cleaner/more natural approach to such queries. As an example, supposed we wanted, for each Gleambook user, to produce a object that has his/her name plus a list of the messages written by that user. In SQL, this would involve a left outer join between users and messages, grouping by user, and having the user name repeated along side each message. In SQL++, this sort of use case can be handled (more naturally) as follows:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT user.name AS uname,
+           (SELECT VALUE msg.message
+            FROM GleambookMessages msg
+            WHERE msg.authorId = user.id) AS messages
+    FROM GleambookUsers user;
+</pre></div></div>
+
+<p>This SQL++ query binds the variable <tt>user</tt> to the data instances in GleambookUsers; for each user, it constructs a result object containing a &#x201c;uname&#x201d; field with the user&#x2019;s name and a &#x201c;messages&#x201d; field with a nested collection of all messages for that user. The nested collection for each user is specified by using a correlated subquery. (Note: While it looks like nested loops could be involved in computing the result, AsterixDB recognizes the equivalence of such a query to an outerjoin, and it will use an efficient hash-based strategy when actually computing the query&#x2019;s result.)</p>
+<p>Here is this example query&#x2019;s expected output:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;uname&quot;: &quot;WillisWynne&quot;, &quot;messages&quot;: [ &quot; love product-b the customization is mind-blowing&quot; ] }
+    { &quot;uname&quot;: &quot;NilaMilliron&quot;, &quot;messages&quot;: [  ] }
+    { &quot;uname&quot;: &quot;WoodrowNehling&quot;, &quot;messages&quot;: [ &quot; love acast its 3G is good:)&quot; ] }
+    { &quot;uname&quot;: &quot;BramHatch&quot;, &quot;messages&quot;: [ &quot; dislike x-phone the voice-command is bad:(&quot;, &quot; can't stand product-z its voicemail-service is OMG:(&quot; ] }
+    { &quot;uname&quot;: &quot;MargaritaStoddard&quot;, &quot;messages&quot;: [ &quot; dislike x-phone its touch-screen is horrible&quot;, &quot; can't stand acast the network is horrible:(&quot;, &quot; like ccast the 3G is awesome:)&quot;, &quot; can't stand product-w the touch-screen is terrible&quot;, &quot; can't stand acast its plan is terrible&quot; ] }
+    { &quot;uname&quot;: &quot;IsbelDull&quot;, &quot;messages&quot;: [ &quot; like product-y the plan is amazing&quot;, &quot; like product-z its platform is mind-blowing&quot; ] }
+    { &quot;uname&quot;: &quot;EmoryUnk&quot;, &quot;messages&quot;: [ &quot; love product-b its shortcut-menu is awesome:)&quot;, &quot; love ccast its wireless is good&quot; ] }
+    { &quot;uname&quot;: &quot;NicholasStroh&quot;, &quot;messages&quot;: [  ] }
+    { &quot;uname&quot;: &quot;VonKemble&quot;, &quot;messages&quot;: [ &quot; dislike product-b the speed is horrible&quot; ] }
+    { &quot;uname&quot;: &quot;SuzannaTillson&quot;, &quot;messages&quot;: [ &quot; like x-phone the voicemail-service is awesome&quot; ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_4_-_Theta_Join"></a>Query 4 - Theta Join</h3>
+<p>Not all joins are expressible as equijoins and computable using equijoin-oriented algorithms. The join predicates for some use cases involve predicates with functions; AsterixDB supports the expression of such queries and will still evaluate them as best it can using nested loop based techniques (and broadcast joins in the parallel case).</p>
+<p>As an example of such a use case, suppose that we wanted, for each chirp message C, to find all of the other chirp messages that originated from within a circle of radius of 1 surrounding chirp C&#x2019;s location. In SQL++, this can be specified in a manner similar to the previous query using one of the built-in functions on the spatial data type instead of id equality in the correlated query&#x2019;s <i>WHERE</i> clause:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT cm1.messageText AS message,
+           (SELECT VALUE cm2.messageText
+            FROM ChirpMessages cm2
+            WHERE `spatial-distance`(cm1.senderLocation, cm2.senderLocation) &lt;= 1
+              AND cm2.chirpId &lt; cm1.chirpId) AS nearbyMessages
+    FROM ChirpMessages cm1;
+</pre></div></div>
+
+<p>Here is the expected result for this query:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;message&quot;: &quot; can't stand x-phone its platform is terrible&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like ccast its shortcut-menu is awesome:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like product-w the speed is good:)&quot;, &quot;nearbyMessages&quot;: [ &quot; hate ccast its voice-clarity is OMG:(&quot; ] }
+    { &quot;message&quot;: &quot; like product-b the voice-command is mind-blowing:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like x-phone the voice-clarity is good:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like product-y the platform is good&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; love ccast its voicemail-service is awesome&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; love product-z its customization is good:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; hate ccast its voice-clarity is OMG:(&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like product-y the voice-command is amazing:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; can't stand product-w its speed is terrible:(&quot;, &quot;nearbyMessages&quot;: [  ] }
+    { &quot;message&quot;: &quot; like product-z the shortcut-menu is awesome:)&quot;, &quot;nearbyMessages&quot;: [  ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_5_-_Fuzzy_Join"></a>Query 5 - Fuzzy Join</h3>
+<p>As another example of a non-equijoin use case, we could ask AsterixDB to find, for each Gleambook user, all Chirp users with names &#x201c;similar&#x201d; to their name. AsterixDB supports a variety of &#x201c;fuzzy match&#x201d; functions for use with textual and set-based data. As one example, we could choose to use edit distance with a threshold of 3 as the definition of name similarity, in which case we could write the following query using SQL++&#x2019;s operator-based syntax (~=) for testing whether or not two values are similar:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+    SET simfunction &quot;edit-distance&quot;;
+    SET simthreshold &quot;3&quot;;
+
+    SELECT gbu.id AS id, gbu.name AS name,
+           (SELECT cm.user.screenName AS chirpScreenname,
+                   cm.user.name AS chirpName
+            FROM ChirpMessages cm
+            WHERE cm.user.name ~= gbu.name) AS similarUsers
+    FROM GleambookUsers gbu;
+</pre></div></div>
+
+<p>The expected result for this query against our sample data is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 6, &quot;name&quot;: &quot;WillisWynne&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 8, &quot;name&quot;: &quot;NilaMilliron&quot;, &quot;similarUsers&quot;: [ { &quot;chirpScreenname&quot;: &quot;NilaMilliron_tw&quot;, &quot;chirpName&quot;: &quot;Nila Milliron&quot; } ] }
+    { &quot;id&quot;: 9, &quot;name&quot;: &quot;WoodrowNehling&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 10, &quot;name&quot;: &quot;BramHatch&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 1, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 2, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 3, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 4, &quot;name&quot;: &quot;NicholasStroh&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 5, &quot;name&quot;: &quot;VonKemble&quot;, &quot;similarUsers&quot;: [  ] }
+    { &quot;id&quot;: 7, &quot;name&quot;: &quot;SuzannaTillson&quot;, &quot;similarUsers&quot;: [  ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_6_-_Existential_Quantification"></a>Query 6 - Existential Quantification</h3>
+<p>The expressive power of SQL++ includes support for queries involving &#x201c;some&#x201d; (existentially quantified) and &#x201c;all&#x201d; (universally quantified) query semantics. As an example of an existential SQL++ query, here we show a query to list the Gleambook users who are currently employed. Such employees will have an employment history containing a object in which the end-date field is <i>MISSING</i> (or it could be there but have the value <i>NULL</i>, as JSON unfortunately provides two ways to represent unknown values). This leads us to the following SQL++ query:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE gbu
+    FROM GleambookUsers gbu
+    WHERE (SOME e IN gbu.employment SATISFIES e.endDate IS UNKNOWN);
+</pre></div></div>
+
+<p>The expected result in this case is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 6, &quot;alias&quot;: &quot;Willis&quot;, &quot;name&quot;: &quot;WillisWynne&quot;, &quot;userSince&quot;: datetime(&quot;2005-01-17T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 3, 7 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;jaydax&quot;, &quot;startDate&quot;: date(&quot;2009-05-15&quot;) } ] }
+    { &quot;id&quot;: 8, &quot;alias&quot;: &quot;Nila&quot;, &quot;name&quot;: &quot;NilaMilliron&quot;, &quot;userSince&quot;: datetime(&quot;2008-01-01T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Plexlane&quot;, &quot;startDate&quot;: date(&quot;2010-02-28&quot;) } ] }
+    { &quot;id&quot;: 1, &quot;alias&quot;: &quot;Margarita&quot;, &quot;name&quot;: &quot;MargaritaStoddard&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2, 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Codetechno&quot;, &quot;startDate&quot;: date(&quot;2006-08-06&quot;) }, { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ], &quot;nickname&quot;: &quot;Mags&quot;, &quot;gender&quot;: &quot;F&quot; }
+    { &quot;id&quot;: 2, &quot;alias&quot;: &quot;Isbel&quot;, &quot;name&quot;: &quot;IsbelDull&quot;, &quot;userSince&quot;: datetime(&quot;2011-01-22T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 4 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Hexviafind&quot;, &quot;startDate&quot;: date(&quot;2010-04-27&quot;) } ], &quot;nickname&quot;: &quot;Izzy&quot; }
+    { &quot;id&quot;: 4, &quot;alias&quot;: &quot;Nicholas&quot;, &quot;name&quot;: &quot;NicholasStroh&quot;, &quot;userSince&quot;: datetime(&quot;2010-12-27T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 2 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Zamcorporation&quot;, &quot;startDate&quot;: date(&quot;2010-06-08&quot;) } ] }
+    { &quot;id&quot;: 5, &quot;alias&quot;: &quot;Von&quot;, &quot;name&quot;: &quot;VonKemble&quot;, &quot;userSince&quot;: datetime(&quot;2010-01-05T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3, 6, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Kongreen&quot;, &quot;startDate&quot;: date(&quot;2010-11-27&quot;) } ] }
+    { &quot;id&quot;: 7, &quot;alias&quot;: &quot;Suzanna&quot;, &quot;name&quot;: &quot;SuzannaTillson&quot;, &quot;userSince&quot;: datetime(&quot;2012-08-07T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 6 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Labzatron&quot;, &quot;startDate&quot;: date(&quot;2011-04-19&quot;) } ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_7_-_Universal_Quantification"></a>Query 7 - Universal Quantification</h3>
+<p>As an example of a universal SQL++ query, here we show a query to list the Gleambook users who are currently unemployed. Such employees will have an employment history containing no objects with unknown end-date field values, leading us to the following SQL++ query:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT VALUE gbu
+    FROM GleambookUsers gbu
+    WHERE (EVERY e IN gbu.employment SATISFIES e.endDate IS NOT UNKNOWN);
+</pre></div></div>
+
+<p>Here is the expected result for our sample data:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;id&quot;: 9, &quot;alias&quot;: &quot;Woodrow&quot;, &quot;name&quot;: &quot;WoodrowNehling&quot;, &quot;userSince&quot;: datetime(&quot;2005-09-20T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 3, 10 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;Zuncan&quot;, &quot;startDate&quot;: date(&quot;2003-04-22&quot;), &quot;endDate&quot;: date(&quot;2009-12-13&quot;) } ], &quot;nickname&quot;: &quot;Woody&quot; }
+    { &quot;id&quot;: 10, &quot;alias&quot;: &quot;Bram&quot;, &quot;name&quot;: &quot;BramHatch&quot;, &quot;userSince&quot;: datetime(&quot;2010-10-16T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;physcane&quot;, &quot;startDate&quot;: date(&quot;2007-06-05&quot;), &quot;endDate&quot;: date(&quot;2011-11-05&quot;) } ] }
+    { &quot;id&quot;: 3, &quot;alias&quot;: &quot;Emory&quot;, &quot;name&quot;: &quot;EmoryUnk&quot;, &quot;userSince&quot;: datetime(&quot;2012-07-10T10:10:00.000Z&quot;), &quot;friendIds&quot;: {{ 1, 5, 8, 9 }}, &quot;employment&quot;: [ { &quot;organizationName&quot;: &quot;geomedia&quot;, &quot;startDate&quot;: date(&quot;2010-06-17&quot;), &quot;endDate&quot;: date(&quot;2010-01-26&quot;) } ] }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_8_-_Simple_Aggregation"></a>Query 8 - Simple Aggregation</h3>
+<p>Like SQL, the SQL++ language of AsterixDB provides support for computing aggregates over large amounts of data. As a very simple example, the following SQL++ query computes the total number of Gleambook users in a SQL-like way:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT COUNT(gbu) AS numUsers FROM GleambookUsers gbu;
+</pre></div></div>
+
+<p>This query&#x2019;s result will be:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;numUsers&quot;: 10 }
+</pre></div></div>
+
+<p>If an &#x201c;unwrapped&#x201d; value is preferred, the following variant could be used instead:</p>
+
+<div>
+<div>
+<pre class="source">    SELECT VALUE COUNT(gbu) FROM GleambookUsers gbu;
+</pre></div></div>
+
+<p>This time the result will simply be:</p>
+
+<div>
+<div>
+<pre class="source">    10
+</pre></div></div>
+
+<p>In SQL++, aggregate functions can be applied to arbitrary collections, including subquery results. To illustrate, here is a less SQL-like&#x2014;and also more explicit&#x2014;way to express the query above:</p>
+
+<div>
+<div>
+<pre class="source">    SELECT VALUE ARRAY_COUNT((SELECT gbu FROM GleambookUsers gbu));
+</pre></div></div>
+
+<p>For each traditional SQL aggregate function <i>F</i>, SQL++ has a corresponding function <i>ARRAY_F</i> that can be used to perform the desired aggregate calculation. Each such function is a regular function that takes a collection-valued argument to aggregate over. Thus, the query above counts the results produced by the GleambookUsers subquery, and the previous, more SQL-like versions are just syntactic sugar for SQL++ queries that use <i>ARRAY_COUNT</i>. (Note: Subqueries in SQL++ must always be parenthesized.)</p></div>
+<div class="section">
+<h3><a name="Query_9-A_-_Grouping_and_Aggregation"></a>Query 9-A - Grouping and Aggregation</h3>
+<p>Also like SQL, SQL++ supports grouped aggregation. For every Chirp user, the following group-by/aggregate query counts the number of chirps sent by that user:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT uid AS user, COUNT(cm) AS count
+    FROM ChirpMessages cm
+    GROUP BY cm.user.screenName AS uid;
+</pre></div></div>
+
+<p>The <i>FROM</i> clause incrementally binds the variable <i>cm</i> to chirps, and the <i>GROUP BY</i> clause groups the chirps by their issuer&#x2019;s Chirp screen-name. Unlike SQL, where data is tabular&#x2014;flat&#x2014;the data model underlying SQL++ allows for nesting. Thus, due to the <i>GROUP BY</i> clause, the <i>SELECT</i> clause in this query sees a sequence of <i>cm</i> groups, with each such group having an associated <i>uid</i> variable value (i.e., the chirping user&#x2019;s screen name). In the context of the <i>SELECT</i> clause, <i>uid</i> is bound to the chirper&#x2019;s id and <i>cm</i> is now re-bound (due to grouping) to the <i>set</i> of chirps issued by that chirper. The <i>SELECT</i> clause yields a result object containing the chirper&#x2019;s user id and the count of the items in the associated chirp set. The query result will contain one such object per screen name. This query also illustrates another feature of SQL++; notice how each user&#x2019;s screen name is accessed via a path syntax that traverses each chirp&#x2019;s nested object structure.</p>
+<p>Here is the expected result for this query over the sample data:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;user&quot;: &quot;ChangEwing_573&quot;, &quot;count&quot;: 1 }
+    { &quot;user&quot;: &quot;OliJackson_512&quot;, &quot;count&quot;: 1 }
+    { &quot;user&quot;: &quot;ColineGeyer@63&quot;, &quot;count&quot;: 3 }
+    { &quot;user&quot;: &quot;NathanGiesen@211&quot;, &quot;count&quot;: 6 }
+    { &quot;user&quot;: &quot;NilaMilliron_tw&quot;, &quot;count&quot;: 1 }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_9-B_-_.28Hash-Based.29_Grouping_and_Aggregation"></a>Query 9-B - (Hash-Based) Grouping and Aggregation</h3>
+<p>As for joins, AsterixDB has multiple evaluation strategies available for processing grouped aggregate queries. For grouped aggregation, the system knows how to employ both sort-based and hash-based aggregation methods, with sort-based methods being used by default and a hint being available to suggest that a different approach be used in processing a particular SQL++ query.</p>
+<p>The following query is similar to Query 9-A, but adds a hash-based aggregation hint:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT uid AS user, COUNT(cm) AS count
+    FROM ChirpMessages cm
+     /*+ hash */
+    GROUP BY cm.user.screenName AS uid;
+</pre></div></div>
+
+<p>Here is the expected result (the same result, but in a slightly different order):</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;user&quot;: &quot;OliJackson_512&quot;, &quot;count&quot;: 1 }
+    { &quot;user&quot;: &quot;ChangEwing_573&quot;, &quot;count&quot;: 1 }
+    { &quot;user&quot;: &quot;ColineGeyer@63&quot;, &quot;count&quot;: 3 }
+    { &quot;user&quot;: &quot;NathanGiesen@211&quot;, &quot;count&quot;: 6 }
+    { &quot;user&quot;: &quot;NilaMilliron_tw&quot;, &quot;count&quot;: 1 }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_10_-_Grouping_and_Limits"></a>Query 10 - Grouping and Limits</h3>
+<p>In some use cases it is not necessary to compute the entire answer to a query. In some cases, just having the first <i>N</i> or top <i>N</i> results is sufficient. This is expressible in SQL++ using the <i>LIMIT</i> clause combined with the <i>ORDER BY</i> clause.</p>
+<p>The following SQL++ query returns the top 3 Chirp users based on who has issued the most chirps:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    SELECT uid AS user, c AS count
+    FROM ChirpMessages cm
+    GROUP BY cm.user.screenName AS uid WITH c AS count(cm)
+    ORDER BY c DESC
+    LIMIT 3;
+</pre></div></div>
+
+<p>The expected result for this query is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;user&quot;: &quot;NathanGiesen@211&quot;, &quot;count&quot;: 6 }
+    { &quot;user&quot;: &quot;ColineGeyer@63&quot;, &quot;count&quot;: 3 }
+    { &quot;user&quot;: &quot;ChangEwing_573&quot;, &quot;count&quot;: 1 }
+</pre></div></div>
+</div>
+<div class="section">
+<h3><a name="Query_11_-_Left_Outer_Fuzzy_Join"></a>Query 11 - Left Outer Fuzzy Join</h3>
+<p>As a last example of SQL++ and its query power, the following query, for each chirp, finds all of the chirps that are similar based on the topics that they refer to:</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+    SET simfunction &quot;jaccard&quot;;
+    SET simthreshold &quot;0.3&quot;;
+
+    SELECT cm1 AS chirp,
+           (SELECT VALUE cm2.chirpId
+            FROM ChirpMessages cm2
+            WHERE cm2.referredTopics ~= cm1.referredTopics
+              AND cm2.chirpId &gt; cm1.chirpId) AS similarChirps
+    FROM ChirpMessages cm1;
+</pre></div></div>
+
+<p>This query illustrates several things worth knowing in order to write fuzzy queries in SQL++. First, as mentioned earlier, SQL++ offers an operator-based syntax (as well as a functional approach, not shown) for seeing whether two values are &#x201c;similar&#x201d; to one another or not. Second, recall that the referredTopics field of objects of datatype ChirpMessageType is a bag of strings. This query sets the context for its similarity join by requesting that Jaccard-based similarity semantics (<a class="externalLink" href="http://en.wikipedia.org/wiki/Jaccard_index">http://en.wikipedia.org/wiki/Jaccard_index</a>) be used for the query&#x2019;s similarity operator and that a similarity index of 0.3 be used as its similarity threshold.</p>
+<p>The expected result for this fuzzy join query is:</p>
+
+<div>
+<div>
+<pre class="source">    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;11&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NilaMilliron_tw&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 445, &quot;statusesCount&quot;: 164, &quot;name&quot;: &quot;Nila Milliron&quot;, &quot;followersCount&quot;: 22649 }, &quot;senderLocation&quot;: point(&quot;37.59,68.42&quot;), &quot;sendTime&quot;: datetime(&quot;2008-03-09T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;x-phone&quot;, &quot;platform&quot; }}, &quot;messageText&quot;: &quot; can't stand x-phone its platform is terrible&quot; }, &quot;similarChirps&quot;: [ &quot;6&quot;, &quot;7&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;2&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;ColineGeyer@63&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 121, &quot;statusesCount&quot;: 362, &quot;name&quot;: &quot;Coline Geyer&quot;, &quot;followersCount&quot;: 17159 }, &quot;senderLocation&quot;: point(&quot;32.84,67.14&quot;), &quot;sendTime&quot;: datetime(&quot;2010-05-13T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;ccast&quot;, &quot;shortcut-menu&quot; }}, &quot;messageText&quot;: &quot; like ccast its shortcut-menu is awesome:)&quot; }, &quot;similarChirps&quot;: [ &quot;9&quot;, &quot;8&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;3&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;29.72,75.8&quot;), &quot;sendTime&quot;: datetime(&quot;2006-11-04T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-w&quot;, &quot;speed&quot; }}, &quot;messageText&quot;: &quot; like product-w the speed is good:)&quot; }, &quot;similarChirps&quot;: [ &quot;5&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;4&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;39.28,70.48&quot;), &quot;sendTime&quot;: datetime(&quot;2011-12-26T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-b&quot;, &quot;voice-command&quot; }}, &quot;messageText&quot;: &quot; like product-b the voice-command is mind-blowing:)&quot; }, &quot;similarChirps&quot;: [  ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;6&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;ColineGeyer@63&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 121, &quot;statusesCount&quot;: 362, &quot;name&quot;: &quot;Coline Geyer&quot;, &quot;followersCount&quot;: 17159 }, &quot;senderLocation&quot;: point(&quot;47.51,83.99&quot;), &quot;sendTime&quot;: datetime(&quot;2010-05-07T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;x-phone&quot;, &quot;voice-clarity&quot; }}, &quot;messageText&quot;: &quot; like x-phone the voice-clarity is good:)&quot; }, &quot;similarChirps&quot;: [  ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;7&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;ChangEwing_573&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 182, &quot;statusesCount&quot;: 394, &quot;name&quot;: &quot;Chang Ewing&quot;, &quot;followersCount&quot;: 32136 }, &quot;senderLocation&quot;: point(&quot;36.21,72.6&quot;), &quot;sendTime&quot;: datetime(&quot;2011-08-25T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-y&quot;, &quot;platform&quot; }}, &quot;messageText&quot;: &quot; like product-y the platform is good&quot; }, &quot;similarChirps&quot;: [  ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;9&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;36.86,74.62&quot;), &quot;sendTime&quot;: datetime(&quot;2012-07-21T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;ccast&quot;, &quot;voicemail-service&quot; }}, &quot;messageText&quot;: &quot; love ccast its voicemail-service is awesome&quot; }, &quot;similarChirps&quot;: [  ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;1&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;47.44,80.65&quot;), &quot;sendTime&quot;: datetime(&quot;2008-04-26T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-z&quot;, &quot;customization&quot; }}, &quot;messageText&quot;: &quot; love product-z its customization is good:)&quot; }, &quot;similarChirps&quot;: [ &quot;8&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;10&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;ColineGeyer@63&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 121, &quot;statusesCount&quot;: 362, &quot;name&quot;: &quot;Coline Geyer&quot;, &quot;followersCount&quot;: 17159 }, &quot;senderLocation&quot;: point(&quot;29.15,76.53&quot;), &quot;sendTime&quot;: datetime(&quot;2008-01-26T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;ccast&quot;, &quot;voice-clarity&quot; }}, &quot;messageText&quot;: &quot; hate ccast its voice-clarity is OMG:(&quot; }, &quot;similarChirps&quot;: [ &quot;2&quot;, &quot;6&quot;, &quot;9&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;12&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;OliJackson_512&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 445, &quot;statusesCount&quot;: 164, &quot;name&quot;: &quot;Oli Jackson&quot;, &quot;followersCount&quot;: 22649 }, &quot;senderLocation&quot;: point(&quot;24.82,94.63&quot;), &quot;sendTime&quot;: datetime(&quot;2010-02-13T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-y&quot;, &quot;voice-command&quot; }}, &quot;messageText&quot;: &quot; like product-y the voice-command is amazing:)&quot; }, &quot;similarChirps&quot;: [ &quot;4&quot;, &quot;7&quot; ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;5&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;40.09,92.69&quot;), &quot;sendTime&quot;: datetime(&quot;2006-08-04T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-w&quot;, &quot;speed&quot; }}, &quot;messageText&quot;: &quot; can't stand product-w its speed is terrible:(&quot; }, &quot;similarChirps&quot;: [  ] }
+    { &quot;chirp&quot;: { &quot;chirpId&quot;: &quot;8&quot;, &quot;user&quot;: { &quot;screenName&quot;: &quot;NathanGiesen@211&quot;, &quot;lang&quot;: &quot;en&quot;, &quot;friendsCount&quot;: 39339, &quot;statusesCount&quot;: 473, &quot;name&quot;: &quot;Nathan Giesen&quot;, &quot;followersCount&quot;: 49416 }, &quot;senderLocation&quot;: point(&quot;46.05,93.34&quot;), &quot;sendTime&quot;: datetime(&quot;2005-10-14T10:10:00.000Z&quot;), &quot;referredTopics&quot;: {{ &quot;product-z&quot;, &quot;shortcut-menu&quot; }}, &quot;messageText&quot;: &quot; like product-z the shortcut-menu is awesome:)&quot; }, &quot;similarChirps&quot;: [  ] }
+</pre></div></div>
+</div></div>
+<div class="section">
+<h2><a name="Inserting_New_Data"></a>Inserting New Data</h2>
+<p>In addition to loading and querying data, AsterixDB supports incremental additions to datasets via the SQL++ <i>INSERT</i> statement.</p>
+<p>The following example adds a new chirp by user &#x201c;NathanGiesen@211&#x201d; to the ChirpMessages dataset. (An astute reader may notice that this chirp was issued a half an hour after his last chirp, so his counts have all gone up in the interim, although he appears not to have moved in the last half hour.)</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+
+    INSERT INTO ChirpMessages
+    (
+       {&quot;chirpId&quot;: &quot;13&quot;,
+        &quot;user&quot;:
+            {&quot;screenName&quot;: &quot;NathanGiesen@211&quot;,
+             &quot;lang&quot;: &quot;en&quot;,
+             &quot;friendsCount&quot;: 39345,
+             &quot;statusesCount&quot;: 479,
+             &quot;name&quot;: &quot;Nathan Giesen&quot;,
+             &quot;followersCount&quot;: 49420
+            },
+        &quot;senderLocation&quot;: point(&quot;47.44,80.65&quot;),
+        &quot;sendTime&quot;: datetime(&quot;2008-04-26T10:10:35&quot;),
+        &quot;referredTopics&quot;: {{&quot;chirping&quot;}},
+        &quot;messageText&quot;: &quot;chirpy chirp, my fellow chirpers!&quot;
+       }
+    );
+</pre></div></div>
+
+<p>In general, the data to be inserted may be specified using any valid SQL++ query expression. The insertion of a single object instance, as in this example, is just a special case where the query expression happens to be a object constructor involving only constants.</p>
+<div class="section">
+<h3><a name="Deleting_Existing_Data"></a>Deleting Existing Data</h3>
+<p>In addition to inserting new data, AsterixDB supports deletion from datasets via the SQL++ <i>DELETE</i> statement. The statement supports &#x201c;searched delete&#x201d; semantics, and its <i>WHERE</i> clause can involve any valid XQuery expression.</p>
+<p>The following example deletes the chirp that we just added from user &#x201c;NathanGiesen@211&#x201d;.  (Easy come, easy go. :-))</p>
+
+<div>
+<div>
+<pre class="source">    USE TinySocial;
+    DELETE FROM ChirpMessages cm WHERE cm.chirpId = &quot;13&quot;;
+</pre></div></div>
+
+<p>It should be noted that one form of data change not yet supported by AsterixDB is in-place data modification (<i>update</i>). Currently, only insert and delete operations are supported in SQL++; updates are not. To achieve the effect of an update, two SQL++ statements are currently needed&#x2014;one to delete the old object from the dataset where it resides, and another to insert the new replacement object (with the same primary key but with different field values for some of the associated data content). AQL additionally supports an upsert operation to either insert a object, if no object with its primary key is currently present in the dataset, or to replace the existing object if one already exists with the primary key value being upserted. SQL++ will soon have <i>UPSERT</i> as well.</p></div>
+<div class="section">
+<h3><a name="Transaction_Support"></a>Transaction Support</h3>
+<p>AsterixDB supports object-level ACID transactions that begin and terminate implicitly for each object inserted, deleted, or searched while a given SQL++ statement is being executed. This is quite similar to the level of transaction support found in today&#x2019;s NoSQL stores. AsterixDB does not support multi-statement transactions, and in fact an SQL++ statement that involves multiple objects can itself involve multiple independent object-level transactions. An example consequence of this is that, when an SQL++ statement attempts to insert 1000 objects, it is possible that the first 800 objects could end up being committed while the remaining 200 objects fail to be inserted. This situation could happen, for example, if a duplicate key exception occurs as the 801st insertion is attempted. If this happens, AsterixDB will report the error (e.g., a duplicate key exception) as the result of the offending SQL++ <i>INSERT</i> statement, and the application logic above will need to take the appropriate action(s) needed to assess the resulting state and to clean up and/or continue as appropriate.</p></div>
+<div class="section">
+<h3><a name="Loading_New_Data_in_Bulk"></a>Loading New Data in Bulk</h3>
+<p>In addition to incremental additions to datasets via the SQL++ <i>insert</i> statement, the <i>load</i> statement can be used to take a file from a given node and load it in a more efficient fashion. Note however that a dataset can currently only be loaded if it is empty.</p>
+<p>The following example loads a file in ADM format from &#x201c;/home/user/gbm.adm&#x201d; from the node named &#x201c;nc1&#x201d; into the GleambookUsers dataset.</p>
+
+<div>
+<div>
+<pre class="source">USE TinySocial;
+
+LOAD DATASET GleambookUsers USING localfs
+    ((&quot;path&quot;=&quot;nc1://home/user/gbu.adm&quot;),(&quot;format&quot;=&quot;adm&quot;));
+</pre></div></div>
+</div></div>
+<div class="section">
+<h2><a name="Further_Help"></a>Further Help</h2>
+<p>That&#x2019;s it! You are now armed and dangerous with respect to semistructured data management using AsterixDB via SQL++. More information about SQL++ is available in the SQL++ Query Language (SQL++) reference document as well as in its companion SQL++ Functions document.</p>
+<p>AsterixDB is a powerful new BDMS&#x2014;Big Data Management System&#x2014;that we hope may usher in a new era of much more declarative Big Data management. AsterixDB is powerful, so use it wisely, and remember: &#x201c;With great power comes great responsibility&#x2026;&#x201d; :-)</p>
+<p>Please e-mail the AsterixDB user group (users (at) asterixdb.apache.org) if you run into any problems or simply have further questions about the AsterixDB system, its features, or their proper use.</p></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>

diff --git a/content/docs/0.9.9/site/sqlpp/similarity.html b/content/docs/0.9.9/site/sqlpp/similarity.html
new file mode 100644
index 0000000..fbc6370
--- /dev/null
+++ b/content/docs/0.9.9/site/sqlpp/similarity.html

@@ -0,0 +1,299 @@
+<!DOCTYPE html>
+<!--
+ | Generated by Apache Maven Doxia Site Renderer 1.8.1 from src/site/markdown/sqlpp/similarity.md at 2024-04-01
+ | Rendered using Apache Maven Fluido Skin 1.7
+-->
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <meta name="Date-Revision-yyyymmdd" content="20240401" />
+    <meta http-equiv="Content-Language" content="en" />
+    <title>AsterixDB &#x2013; AsterixDB  Support of Similarity Queries</title>
+    <link rel="stylesheet" href="../css/apache-maven-fluido-1.7.min.css" />
+    <link rel="stylesheet" href="../css/site.css" />
+    <link rel="stylesheet" href="../css/print.css" media="print" />
+    <script type="text/javascript" src="../js/apache-maven-fluido-1.7.min.js"></script>
+
+  </head>
+  <body class="topBarDisabled">
+    <div class="container-fluid">
+      <div id="banner">
+        <div class="pull-left"><a href=".././" id="bannerLeft"><img src="../images/asterixlogo.png"  alt="AsterixDB"/></a></div>
+        <div class="pull-right"></div>
+        <div class="clear"><hr/></div>
+      </div>
+
+      <div id="breadcrumbs">
+        <ul class="breadcrumb">
+        <li id="publishDate">Last Published: 2024-04-01</li>
+      <li id="projectVersion" class="pull-right">Version: 0.9.9</li>
+      <li class="pull-right"><a href="../index.html" title="Documentation Home">Documentation Home</a></li>
+        </ul>
+      </div>
+      <div class="row-fluid">
+        <div id="leftColumn" class="span2">
+          <div class="well sidebar-nav">
+    <ul class="nav nav-list">
+      <li class="nav-header">Get Started - Installation</li>
+    <li><a href="../ncservice.html" title="Option 1: using NCService"><span class="none"></span>Option 1: using NCService</a></li>
+    <li><a href="../ansible.html" title="Option 2: using Ansible"><span class="none"></span>Option 2: using Ansible</a></li>
+    <li><a href="../aws.html" title="Option 3: using Amazon Web Services"><span class="none"></span>Option 3: using Amazon Web Services</a></li>
+      <li class="nav-header">AsterixDB Primer</li>
+    <li><a href="../sqlpp/primer-sqlpp.html" title="Using SQL++"><span class="none"></span>Using SQL++</a></li>
+      <li class="nav-header">Data Model</li>
+    <li><a href="../datamodel.html" title="The Asterix Data Model"><span class="none"></span>The Asterix Data Model</a></li>
+      <li class="nav-header">Queries</li>
+    <li><a href="../sqlpp/manual.html" title="The SQL++ Query Language"><span class="none"></span>The SQL++ Query Language</a></li>
+    <li><a href="../SQLPP.html" title="Raw SQL++ Grammar"><span class="none"></span>Raw SQL++ Grammar</a></li>
+    <li><a href="../sqlpp/builtins.html" title="Builtin Functions"><span class="none"></span>Builtin Functions</a></li>
+      <li class="nav-header">API/SDK</li>
+    <li><a href="../api.html" title="HTTP API"><span class="none"></span>HTTP API</a></li>
+    <li><a href="../csv.html" title="CSV Output"><span class="none"></span>CSV Output</a></li>
+      <li class="nav-header">Advanced Features</li>
+    <li><a href="../aql/externaldata.html" title="Accessing External Data"><span class="none"></span>Accessing External Data</a></li>
+    <li><a href="../feeds.html" title="Data Ingestion with Feeds"><span class="none"></span>Data Ingestion with Feeds</a></li>
+    <li><a href="../udf.html" title="User Defined Functions"><span class="none"></span>User Defined Functions</a></li>
+    <li><a href="../sqlpp/filters.html" title="Filter-Based LSM Index Acceleration"><span class="none"></span>Filter-Based LSM Index Acceleration</a></li>
+    <li><a href="../sqlpp/fulltext.html" title="Support of Full-text Queries"><span class="none"></span>Support of Full-text Queries</a></li>
+    <li class="active"><a href="#"><span class="none"></span>Support of Similarity Queries</a></li>
+    <li><a href="../geo/quickstart.html" title="GIS Support Overview"><span class="none"></span>GIS Support Overview</a></li>
+    <li><a href="../geo/functions.html" title="GIS Functions"><span class="none"></span>GIS Functions</a></li>
+    <li><a href="../interval_join.html" title="Support of Interval Joins"><span class="none"></span>Support of Interval Joins</a></li>
+    <li><a href="../spatial_join.html" title="Support of Spatial Joins"><span class="none"></span>Support of Spatial Joins</a></li>
+    <li><a href="../sqlpp/arrayindex.html" title="Support of Array Indexes"><span class="none"></span>Support of Array Indexes</a></li>
+      <li class="nav-header">Deprecated</li>
+    <li><a href="../aql/primer.html" title="AsterixDB Primer: Using AQL"><span class="none"></span>AsterixDB Primer: Using AQL</a></li>
+    <li><a href="../aql/manual.html" title="Queries: The Asterix Query Language (AQL)"><span class="none"></span>Queries: The Asterix Query Language (AQL)</a></li>
+    <li><a href="../aql/builtins.html" title="Queries: Builtin Functions (AQL)"><span class="none"></span>Queries: Builtin Functions (AQL)</a></li>
+</ul>
+          <hr />
+          <div id="poweredBy">
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+            <div class="clear"></div>
+<a href=".././" title="AsterixDB" class="builtBy"><img class="builtBy"  alt="AsterixDB" src="../images/asterixlogo.png"    /></a>
+            </div>
+          </div>
+        </div>
+        <div id="bodyColumn"  class="span10" >
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+<h1>AsterixDB  Support of Similarity Queries</h1>
+<div class="section">
+<h2><a name="Table_of_Contents"></a><a name="toc" id="toc">Table of Contents</a></h2>
+<ul>
+
+<li><a href="#Motivation">Motivation</a></li>
+<li><a href="#DataTypesAndSimilarityFunctions">Data Types and Similarity Functions</a></li>
+<li><a href="#SimilaritySelectionQueries">Similarity Selection Queries</a></li>
+<li><a href="#SimilarityJoinQueries">Similarity Join Queries</a></li>
+<li><a href="#UsingIndexesToSupportSimilarityQueries">Using Indexes to Support Similarity Queries</a></li>
+</ul></div>
+<div class="section">
+<h2><a name="Motivation_.5BBack_to_TOC.5D"></a><a name="Motivation" id="Motivation">Motivation</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>Similarity queries are widely used in applications where users need to find objects that satisfy a similarity predicate, while exact matching is not sufficient. These queries are especially important for social and Web applications, where errors, abbreviations, and inconsistencies are common.  As an example, we may want to find all the movies starring Schwarzenegger, while we don&#x2019;t know the exact spelling of his last name (despite his popularity in both the movie industry and politics :-)). As another example, we want to find all the Facebook users who have similar friends. To meet this type of needs, AsterixDB supports similarity queries using efficient indexes and algorithms.</p></div>
+<div class="section">
+<h2><a name="Data_Types_and_Similarity_Functions_.5BBack_to_TOC.5D"></a><a name="DataTypesAndSimilarityFunctions" id="DataTypesAndSimilarityFunctions">Data Types and Similarity Functions</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB supports <a class="externalLink" href="http://en.wikipedia.org/wiki/Levenshtein_distance">edit distance</a> (on strings) and <a class="externalLink" href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard</a> (on sets).  For instance, in our <a href="../sqlpp/primer-sqlpp.html#ADM:_Modeling_Semistructured_Data_in_AsterixDB">TinySocial</a> example, the <tt>friendIds</tt> of a Gleambook user forms a set of friends, and we can define a similarity between the sets of friends of two users. We can also convert a string to a set of grams of a length &#x201c;n&#x201d; (called &#x201c;n-grams&#x201d;) and define the Jaccard similarity between the two gram sets of the two strings. Formally, the &#x201c;n-grams&#x201d; of a string are its substrings of length &#x201c;n&#x201d;. For instance, the 3-grams of the string <tt>schwarzenegger</tt> are <tt>sch</tt>, <tt>chw</tt>, <tt>hwa</tt>, &#x2026;, <tt>ger</tt>.</p>
+<p>AsterixDB provides <a href="../sqlpp/builtins.html#Tokenizing_Functions">tokenization functions</a> to convert strings to sets, and the <a href="../sqlpp/builtins.html#Similarity_Functions">similarity functions</a>.</p></div>
+<div class="section">
+<h2><a name="Similarity_Selection_Queries_.5BBack_to_TOC.5D"></a><a name="SimilaritySelectionQueries" id="SimilaritySelectionQueries">Similarity Selection Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>The following query asks for all the Gleambook users whose name is similar to <tt>Suzanna Tilson</tt>, i.e., their edit distance is at most 2.</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where edit_distance(u.name, &quot;Suzanna Tilson&quot;) &lt;= 2;
+</pre></div></div>
+
+<p>The following query asks for all the Gleambook users whose set of friend ids is similar to <tt>[1,5,9,10]</tt>, i.e., their Jaccard similarity is at least 0.6.</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where similarity_jaccard(u.friendIds, [1,5,9,10]) &gt;= 0.6f;
+</pre></div></div>
+
+<p>AsterixDB allows a user to use a similarity operator <tt>~=</tt> to express a condition by defining the similarity function and threshold using &#x201c;set&#x201d; statements earlier. For instance, the above query can be equivalently written as:</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    set simfunction &quot;jaccard&quot;;
+    set simthreshold &quot;0.6f&quot;;
+
+    select u
+    from GleambookUsers u
+    where u.friendIds ~= [1,5,9,10];
+</pre></div></div>
+
+<p>In this query, we first declare Jaccard as the similarity function using <tt>simfunction</tt> and then specify the threshold <tt>0.6f</tt> using <tt>simthreshold</tt>.</p></div>
+<div class="section">
+<h2><a name="Similarity_Join_Queries_.5BBack_to_TOC.5D"></a><a name="SimilarityJoinQueries" id="SimilarityJoinQueries">Similarity Join Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB supports fuzzy joins between two sets. The following <a href="../sqlpp/primer-sqlpp.html#Query_5_-_Fuzzy_Join">query</a> finds, for each Gleambook user, all Chirp users with names similar to their name based on the edit distance.</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    set simfunction &quot;edit-distance&quot;;
+    set simthreshold &quot;3&quot;;
+
+    select gbu.id, gbu.name, (select cu.screenName, cu.name
+                              from ChirpUsers cu
+                              where cu.name ~= gbu.name) as similar_users
+    from GleambookUsers gbu;
+</pre></div></div>
+</div>
+<div class="section">
+<h2><a name="Using_Indexes_to_Support_Similarity_Queries_.5BBack_to_TOC.5D"></a><a name="UsingIndexesToSupportSimilarityQueries" id="UsingIndexesToSupportSimilarityQueries">Using Indexes to Support Similarity Queries</a> <font size="4"><a href="#toc">[Back to TOC]</a></font></h2>
+<p>AsterixDB uses two types of indexes to support similarity queries, namely &#x201c;ngram index&#x201d; and &#x201c;keyword index&#x201d;.</p>
+<div class="section">
+<h3><a name="NGram_Index"></a>NGram Index</h3>
+<p>An &#x201c;ngram index&#x201d; is constructed on a set of strings.  We generate n-grams for each string, and build an inverted list for each n-gram that includes the ids of the strings with this gram.  A similarity query can be answered efficiently by accessing the inverted lists of the grams in the query and counting the number of occurrences of the string ids on these inverted lists.  The similar idea can be used to answer queries with Jaccard similarity.  A detailed description of these techniques is available at this <a class="externalLink" href="http://www.ics.uci.edu/~chenli/pub/icde2009-memreducer.pdf">paper</a>.</p>
+<p>For instance, the following DDL statements create an ngram index on the <tt>GleambookUsers.name</tt> attribute using an inverted index of 3-grams.</p>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    create index gbUserIdx on GleambookUsers(name) type ngram(3);
+</pre></div></div>
+
+<p>The number &#x201c;3&#x201d; in &#x201c;ngram(3)&#x201d; is the length &#x201c;n&#x201d; in the grams. This index can be used to optimize similarity queries on this attribute using <a href="../sqlpp/builtins.html#edit_distance">edit_distance</a>, <a href="../sqlpp/builtins.html#edit_distance_check">edit_distance_check</a>, <a href="../sqlpp/builtins.html#similarity_jaccard">similarity_jaccard</a>, or <a href="../sqlpp/builtins.html#similarity_jaccard_check">similarity_jaccard_check</a> queries on this attribute where the similarity is defined on sets of 3-grams.  This index can also be used to optimize queries with the &#x201c;<a href="(../sqlpp/builtins.html#contains">contains()</a>&#x201d; predicate (i.e., substring matching) since it can be also be solved by counting on the inverted lists of the grams in the query string.</p>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_edit_distance"></a>NGram Index usage case - <a href="../sqlpp/builtins.html#edit-distance">edit_distance</a></h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where edit_distance(u.name, &quot;Suzanna Tilson&quot;) &lt;= 2;
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_edit_distance_check"></a>NGram Index usage case - <a href="../sqlpp/builtins.html#edit_distance_check">edit_distance_check</a></h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where edit_distance_check(u.name, &quot;Suzanna Tilson&quot;, 2)[0];
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="NGram_Index_usage_case_-_contains.28.29"></a>NGram Index usage case - <a href="(../sqlpp/builtins.html#contains">contains()</a></h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select m
+    from GleambookMessages m
+    where contains(m.message, &quot;phone&quot;);
+</pre></div></div>
+</div></div>
+<div class="section">
+<h3><a name="Keyword_Index"></a>Keyword Index</h3>
+<p>A &#x201c;keyword index&#x201d; is constructed on a set of strings or sets (e.g., array, multiset). Instead of generating grams as in an ngram index, we generate tokens (e.g., words) and for each token, construct an inverted list that includes the ids of the objects with this token.  The following two examples show how to create keyword index on two different types:</p>
+<div class="section">
+<h4><a name="Keyword_Index_on_String_Type"></a>Keyword Index on String Type</h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    drop index GleambookMessages.gbMessageIdx if exists;
+    create index gbMessageIdx on GleambookMessages(message) type keyword;
+
+    select m
+    from GleambookMessages m
+    where similarity_jaccard_check(word_tokens(m.message), word_tokens(&quot;love like ccast&quot;), 0.2f)[0];
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Keyword_Index_on_Multiset_Type"></a>Keyword Index on Multiset Type</h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    create index gbUserIdxFIds on GleambookUsers(friendIds) type keyword;
+
+    select u
+    from GleambookUsers u
+    where similarity_jaccard_check(u.friendIds, {{3,10}}, 0.5f)[0];
+</pre></div></div>
+
+<p>As shown above, keyword index can be used to optimize queries with token-based similarity predicates, including <a href="../sqlpp/builtins.html#similarity_jaccard">similarity_jaccard</a> and <a href="../sqlpp/builtins.html#similarity_jaccard_check">similarity_jaccard_check</a>.</p></div>
+<div class="section">
+<h4><a name="Keyword_Index_usage_case_-_similarity_jaccard"></a>Keyword Index usage case - <a href="../sqlpp/builtins.html#similarity_jaccard">similarity_jaccard</a></h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where similarity_jaccard(u.friendIds, [1,5,9,10]) &gt;= 0.6f;
+</pre></div></div>
+</div>
+<div class="section">
+<h4><a name="Keyword_Index_usage_case_-_similarity_jaccard_check"></a>Keyword Index usage case - <a href="../sqlpp/builtins.html#similarity_jaccard_check">similarity_jaccard_check</a></h4>
+
+<div>
+<div>
+<pre class="source">    use TinySocial;
+
+    select u
+    from GleambookUsers u
+    where similarity_jaccard_check(u.friendIds, [1,5,9,10], 0.6f)[0];
+</pre></div></div></div></div></div>
+        </div>
+      </div>
+    </div>
+    <hr/>
+    <footer>
+      <div class="container-fluid">
+        <div class="row-fluid">
+<div class="row-fluid">Apache AsterixDB, AsterixDB, Apache, the Apache
+        feather logo, and the Apache AsterixDB project logo are either
+        registered trademarks or trademarks of The Apache Software
+        Foundation in the United States and other countries.
+        All other marks mentioned may be trademarks or registered
+        trademarks of their respective owners.
+      </div>
+        </div>
+      </div>
+    </footer>
+  </body>
+</html>
commit	bf8620b3274a2fa6ce777a8500edb07f9804aeb6	[log] [tgz]
author	Ian Maxon <ian@maxons.email>	Mon Apr 01 16:09:18 2024 -0700
committer	Ian Maxon <imaxon@uci.edu>	Mon Apr 01 23:10:11 2024 +0000
tree	3320fb6810643ba0a83f54ffca8557c102b767db
parent	1c2dec1b731f72f4ec7e89a08597d2f55edf6b91 [diff]