[ASTERIXDB-2792][DOC]Automate railroad diagram gen - Use -Pgenerate.rr to make the diagrams. Requires a headed JDK - Also add Simion's updates to the docs and grammar Change-Id: I0c466d9a28cba9a5e07f65339969271d09160289 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/8603 Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu> Reviewed-by: Dmitry Lychagin <dmitry.lychagin@couchbase.com> Contrib: Ian Maxon <imaxon@uci.edu>

commit: 32f4269ccd91b892eb5fad250b8a4ab217525123 [log] [tgz]
author: Ian Maxon <ian@maxons.email> Thu Nov 05 11:05:09 2020 -0800
committer: Ian Maxon <imaxon@uci.edu> Fri Nov 06 06:48:25 2020 +0000
tree: e67b5ebcbc4501beef36064038f9040d418c8da3
parent: 638dbd8bdb2bfa8c98d9f08f1e6f6bded63aa80d [diff]
diff --git a/README.md b/README.md
index 483b108..c996c75 100644
--- a/README.md
+++ b/README.md

@@ -90,7 +90,13 @@
 
 ## Documentation
 
+To generate the documentation, run asterix-doc with the generate.rr profile in maven, e.g  `mvn -Pgenerate.rr ...`
+Be sure to run `mvn package` beforehand or run `mvn site` in asterix-lang-sqlpp to generate some resources that
+are used in the documentation that are generated directly from the grammar.
+
 * [master](https://ci.apache.org/projects/asterixdb/index.html) |
+  [0.9.6](http://asterixdb.apache.org/docs/0.9.6/index.html) |
+  [0.9.5](http://asterixdb.apache.org/docs/0.9.5/index.html) |
   [0.9.4.1](http://asterixdb.apache.org/docs/0.9.4.1/index.html) |
   [0.9.4](http://asterixdb.apache.org/docs/0.9.4/index.html) |
   [0.9.3](http://asterixdb.apache.org/docs/0.9.3/index.html) |

diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml
index 88cc8cf..7c21486 100644
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml

@@ -52,7 +52,7 @@
             <configuration>
               <target>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/manual.md">
-                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_2_interval_joins.md,appendix_3_title.md,appendix_3_resolution.md" />
+                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_query.md,4_windowfunctions_title.md,4_windowfunctions.md,5_error_title.md,5_error.md,6_sql_diff_title.md,6_sql_diff.md,7_ddl_head.md,7_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_3_title.md,appendix_3_resolution.md,appendix_4_title.md,appendix_4_manual_data.md" />
                 </concat>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/builtins.md">
                   <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,0_toc_sqlpp.md,0_toc_common.md,1_numeric_common.md,1_numeric_delta.md,2_string_common.md,2_string_delta.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md,15_bitwise.md,14_window.md" />
@@ -75,24 +75,28 @@
                 <concat destfile="${project.build.directory}/generated-site/markdown/udf.md">
                   <filelist dir="${project.basedir}/src/main/user-defined_function/" files="udf_title.md,udf.md" />
                 </concat>
+                <concat destfile="${project.build.directory}/generated-site/markdown/interval_join.md">
+                  <filelist dir="${project.basedir}/src/main/interval_join/" files="interval_join_title.md,interval_join.md" />
+                </concat>
               </target>
             </configuration>
             <goals>
               <goal>run</goal>
             </goals>
           </execution>
-		  <execution>
-			  <id>sqlpp</id>
-			  <phase>prepare-package</phase>
-			  <configuration>
-				  <target>
-					  <copy file="${project.basedir}/../asterix-lang-sqlpp/target/site/jjdoc/SQLPP.html" tofile="${project.build.directory}/site/SQLPP.html"/>
-				  </target>
-			  </configuration>
-			  <goals>
-				  <goal>run</goal>
-			  </goals>
-		  </execution>
+          <execution>
+              <!-- TODO: this is gross, we should consume this as part of the asterix-lang-sqlpp dependency's build, not copy -->
+              <id>sqlpp</id>
+              <phase>prepare-package</phase>
+              <configuration>
+                  <target>
+                      <copy file="${project.basedir}/../asterix-lang-sqlpp/target/site/jjdoc/SQLPP.html" tofile="${project.build.directory}/site/SQLPP.html"/>
+                  </target>
+              </configuration>
+              <goals>
+                  <goal>run</goal>
+              </goals>
+          </execution>
         </executions>
       </plugin>
       <plugin>
@@ -123,11 +127,101 @@
         <configuration>
           <excludes combine.children="append">
             <exclude>src/site/resources/data/lineitem.tbl</exclude>
+            <exclude>src/main/grammar/sqlpp.ebnf</exclude>
           </excludes>
         </configuration>
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>generate.rr</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>venv</id>
+                <phase>generate-resources</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <!--suppress UnresolvedMavenProperty -->
+                  <executable>${java.home}/bin/java</executable>
+                  <workingDirectory>${project.build.directory}</workingDirectory>
+                  <arguments>
+                    <argument>-jar</argument>
+                    <argument>rr.war</argument>
+                    <argument>-png</argument>
+                    <argument>-out:railroads.zip</argument>
+                    <argument>-color:#f7f7f7</argument>
+                    <argument>-width:1280</argument>
+                    <argument>../src/main/grammar/sqlpp.ebnf</argument>
+                  </arguments>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>com.googlecode.maven-download-plugin</groupId>
+            <artifactId>download-maven-plugin</artifactId>
+            <version>1.4.2</version>
+            <executions>
+              <execution>
+                <id>install-rr</id>
+                <phase>initialize</phase>
+                <goals>
+                  <goal>wget</goal>
+                </goals>
+                <configuration>
+                  <url>https://github.com/GuntherRademacher/rr/releases/download/v1.62/rr-1.62-java8.zip</url>
+                  <outputDirectory>${project.build.directory}</outputDirectory>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>extract-rr</id>
+                <phase>generate-sources</phase>
+                <configuration>
+                  <tasks>
+                    <echo message="Extracting rr" />
+                    <unzip src="${project.build.directory}/rr-1.62-java8.zip" dest="${project.build.directory}" />
+                  </tasks>
+                </configuration>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+              </execution>
+              <execution>
+                <id>extract-diagrams</id>
+                <phase>process-resources</phase>
+                <configuration>
+                  <tasks>
+                    <echo message="Extracting diagrams" />
+                    <unzip src="${project.build.directory}/railroads.zip" dest="${project.build.directory}" />
+                      <copy todir="${project.build.directory}/site/images/diagrams/">
+                      <fileset dir="${project.build.directory}/diagram/"/>
+                    </copy>
+                  </tasks>
+                </configuration>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 
   <distributionManagement>
     <site>

diff --git a/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf b/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf
new file mode 100644
index 0000000..aaf6761
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/grammar/sqlpp.ebnf

@@ -0,0 +1,243 @@
+Expr ::= OperatorExpr | QuantifiedExpr
+
+OperatorExpr ::= PathExpr
+                 | Operator OperatorExpr
+                 | OperatorExpr Operator OperatorExpr?
+                 | OperatorExpr "BETWEEN" OperatorExpr "AND" OperatorExpr
+
+QuantifiedExpr::= ( "SOME" | "EVERY" ) Variable "IN" Expr ( "," Variable "in" Expr )*
+                         "SATISFIES" Expr ("END")?
+
+PathExpr ::= PrimaryExpr ("." Identifier | "[" Expr (":" (Expr)? )? "]")*
+
+PrimaryExpr ::=  Literal
+                |VariableRef
+                |ParameterRef
+                |ParenthesizedExpr
+                |FunctionCall
+                |CaseExpr
+                |Constructor
+
+Literal ::=  StringLiteral
+            |IntegerLiteral
+            |FloatLiteral
+            |DoubleLiteral
+            |"NULL"
+            |"MISSING"
+            |"TRUE"
+            |"FALSE"
+
+ParenthesizedExpr ::= ("(" Expr ")") | Subquery
+
+Subquery ::= ("(" Selection ")")
+
+FunctionCall ::= OrdinaryFunctionCall | AggregateFunctionCall | WindowFunctionCall
+
+OrdinaryFunctionCall ::= Identifier "(" Expr ("," Expr)* ")"
+
+AggregateFunctionCall ::= Identifier "(" ("DISTINCT")? Expr ")"
+
+CaseExpr ::= SimpleCaseExpr | SearchedCaseExpr
+
+SimpleCaseExpr ::= "CASE" Expr ("WHEN" Expr "THEN" Expr)+ ("ELSE" Expr)? "END"
+
+SearchedCaseExpr ::= "CASE"("WHEN" Expr "THEN" Expr)+ ("ELSE" Expr)? "END"
+
+Constructor ::= ObjectConstructor | ArrayConstructor | MultisetConstructor
+
+ObjectConstructor ::= "{" ( Expr ( ":" Expr )?( ","Expr ( ":" Expr )? )* )? "}"
+
+ArrayConstructor ::= "[" Expr ("," Expr)* "]"
+
+MultisetConstructor ::= "{{" Expr ("," Expr)* "}}"
+
+Query ::= (Expr | Selection) ";"
+
+Selection ::= WithClause? QueryBlock UnionOption* OrderByClause? LimitClause?
+
+QueryBlock ::=  SelectClause StreamGenerator?
+               |StreamGenerator SelectClause
+
+StreamGenerator::= FromClause LetClause? WhereClause? (GroupByClause LetClause? HavingClause?)?
+
+SelectClause ::= "SELECT" ("DISTINCT" | "ALL")? "VALUE" Expr
+               | "SELECT" ("DISTINCT" | "ALL")? ((Expr ("AS"? Identifier)?) | "*" | Identifier "." "*") ("," ((Expr ("AS"? Identifier)?) | "*" | Identifier "." "*"))*
+
+FromClause ::= "FROM" FromTerm ("," FromTerm)*
+
+FromTerm ::= NamedExpr JoinStep*
+
+NamedExpr ::= Expr
+             |Expr "AS"? Variable
+
+JoinStep ::= ("INNER" | ("LEFT" "OUTER"?))? ("JOIN" NamedExpr "ON" Expr | "UNNEST" NamedExpr)
+
+LetClause ::= "LET" Variable "=" Expr ("," Variable "=" Expr)*
+
+WhereClause ::= "WHERE" Expr
+
+GroupByClause ::= "GROUP BY" Expr ("AS"? Identifier)? ( "," Expr ("AS"? Identifier)?)* GroupAsClause?
+
+HavingClause ::= "HAVING" Expr
+
+GroupAsClause ::= "GROUP AS" Identifier
+
+Selection ::= WithClause? QueryBlock UnionOption* OrderByClause? LimitClause?
+
+UnionOption ::= "UNION ALL" (QueryBlock | Subquery)
+
+WithClause ::= "WITH" Identifier "AS" Expr
+                       ("," Identifier "AS" Expr)*
+
+OrderbyClause ::= "ORDER BY" Expr ( "ASC" | "DESC" )?
+                       ( "," Expr ( "ASC" | "DESC" )? )*
+
+LimitClause ::= "LIMIT" Expr ("OFFSET" Expr)?
+
+Subquery ::= "(" Selection ")"
+
+WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")" WindowFunctionOptions? "OVER" (Variable "AS")? "(" WindowDefinition")"
+
+WindowFunctionType ::= AggregateFunction
+                     | WindowFunction
+
+WindowFunctionArguments ::=  ( ("DISTINCT")? Expr | (Expr ("," Expr ("," Expr)? )? )? )
+
+WindowFunctionOptions ::= ("FROM" ( "FIRST" | "LAST" ))? (( "RESPECT" | "IGNORE" ) "NULLS")?
+
+WindowDefinition ::= WindowPartitionClause? (WindowOrderClause (WindowFrameClause WindowFrameExclusion?)?)?
+
+WindowPartitionClause ::= "PARTITION" "BY" Expr ("," Expr)*
+
+WindowOrderClause ::= "ORDER" "BY" Expr ("ASC"|"DESC")? ("," Expr ("ASC" | "DESC")?)*
+
+WindowFrameClause ::= ("ROWS" | "RANGE" | "GROUPS") WindowFrameExtent
+
+WindowFrameExtent ::= ( ( "UNBOUNDED" | Expr ) "PRECEDING" | "CURRENT" "ROW" ) |
+"BETWEEN"( "UNBOUNDED" "PRECEDING" | "CURRENT" "ROW" | Expr ( "PRECEDING" | "FOLLOWING" ) )
+"AND" ( "UNBOUNDED" "FOLLOWING" | "CURRENT" "ROW" | Expr ( "PRECEDING" | "FOLLOWING" ) )
+
+WindowFrameExclusion ::= "EXCLUDE" ( "CURRENT" "ROW" | "GROUP" | "TIES" |
+"NO" "OTHERS" )
+
+Stmnt::= (SingleStmnt ";")+ "EOF"
+
+SingleStmnt ::= UseStmnt
+               |SetStmnt
+               |FunctionDeclaration
+               |Query
+               |CreateStmnt
+               |DropStmnt
+               |LoadStmnt
+               |InsertStmnt
+               |UpsertStmnt
+               |DeleteStmnt
+
+UseStmnt ::= "USE" Identifier
+
+FunctionDeclaration ::= "DELCARE" "FUNCTION" Identifier ParameterList "{" Expr "}"
+
+ParameterList ::= "(" ("VARIABLE" ("," "VARIABLE")*)?")"
+
+CreateStmnt ::= CreateDataverse
+              | CreateType
+              | CreateDataset
+              | CreateIndex
+              | CreateSynonym
+              | CreateFunction
+
+QualifiedName ::= Identifier ("." Identifier)?
+
+DoubleQualifiedName ::= Identifier "." Identifier ("." Identifier)?
+
+CreateDataverse ::= "CREATE" "DATAVERSE" Identifier ("IF" "NOT" "EXISTS")?
+
+CreateType ::= "CREATE" "TYPE" QualifiedName ("IF" "NOT" "EXISTS")? "AS" ObjectTypeDef
+
+ObjectTypeDef ::= ("CLOSED" | "OPEN")? "{" ObjectField ("," ObjectField)* "}"
+
+ObjectField ::= Identifier ":" Identifier "?"?
+
+TypeExpr ::= ObjectTypeDef
+            |ArrayTypeDef
+            |MultisetTypeDef
+            |TypeRef
+
+ArrayTypeDef ::= "[" TypeExpr "]"
+
+MultisetTypeDef ::= "{{" TypeExpr "}}"
+
+TypeRef ::= Identifier
+
+CreateDataset ::= CreateInternalDataset | CreateExternalDataset
+
+CreateInternalDataset ::= ( "INTERNAL" )? "DATASET" QualifiedName "(" QualifiedName ")" ("IF" "NOT" "EXISTS")?
+                           PrimaryKey ( "ON" Identifier )? ( "HINTS" Properties )?
+                           ( "USING" "COMPACTION" "POLICY" CompactionPolicy ( Configuration )? )?
+                           ( "WITH" "FILTER" "ON" Identifier )?
+
+CreateExternalDataset ::= "EXTERNAL" "DATASET" QualifiedName "(" QualifiedName ")" ("IF" "NOT" "EXISTS")? "USING" AdapterName
+                           Configuration ( "HINTS" Properties )?
+                           ( "USING" "COMPACTION" "POLICY" CompactionPolicy ( Configuration )? )?
+
+AdapterName ::= Identifier
+
+Configuration::= "(" (KeyValuePair ("," KeyValuePair)*)? ")"
+
+KeyValuePair ::= "(" StringLiteral "=" StringLiteral ")"
+
+Properties ::= ( "(" Identifier "=" ( StringLiteral | IntegerLiteral ) ( "," Identifier "=" ( StringLiteral | IntegerLiteral ) )* ")" )?
+
+PrimaryKey ::= "PRIMARY" "KEY" NestedField ( "," NestedField )* ("AUTOGENERATED")?
+
+NestedField ::= Identifier ( "." Identifier )*
+
+CompactionPolicy ::= Identifier
+
+CreateIndex ::= CreateSecondaryIndex | CreatePrimaryKeyIndex
+
+CreateSecondaryIndex ::= "CREATE" ("INDEX" Identifier ("IF" "NOT" "EXISTS")? "ON" QualifiedName
+                       "(" ( IndexField ) ( "," IndexField )* ")" ("TYPE" IndexType)? ("ENFORCED")?)
+
+CreatePrimaryKeyIndex ::=  "CREATE" "PRIMARY" "INDEX" Identifier? ("IF" "NOT" "EXISTS")? "ON" QualifiedName ("TYPE" "BTREE")?
+
+IndexField ::= NestedField (":" TypeRef)?
+
+IndexType ::= "BTREE"
+             |"RTREE"
+             |"KEYWORD"
+             |"NGRAM" "(" IntegerLiteral ")"
+
+CreateSynonym ::= "CREATE" "SYNONYM" QualifiedName "FOR" QualifiedName ("IF" "NOT" "EXISTS")?
+
+FunctionParameters ::= "(" ("VARIABLE" (":" TypeExpr)? ("," "VARIABLE" (":" TypeExpr)? )* )? ")"
+
+
+CreateFunction ::= "CREATE" ("OR" "REPLACE")? "FUNCTION" FunctionOrTypeName ("IF" "NOT" "EXISTS")? FunctionParameters ( "RETURNS" TypeExpr)?
+                   ( ("{" Expr "}") | ("AS" FunctionExternalIdentifier "AT" QualifiedName ("WITH" ObjectConstructor)?))
+
+DropStmnt ::= "DROP" ("DATAVERSE" Identifier
+                     | "TYPE" FunctionOrTypeName
+                     | ("DATSET" | "SYNONYM") QualifiedName
+                     | "INDEX" DoubleQualifiedName
+                     | "FUNCTION" FunctionSignature ) ("IF" "EXISTS")?
+
+FunctionSignature ::= FunctionOrTypeName "@" IntegerLiteral
+
+LoadStmnt ::= "LOAD" "DATASET" QualifiedName "USING" AdapterName Configuration ("PRE-SORTED")?
+
+InsertStmnt ::= "INSERT" "INTO" QualifiedName Query
+
+UpsertStmnt ::= "UPSERT" "INTO" QualifiedName Query
+
+DeleteStmnt ::= "DELETE" "FROM" QualifiedName (("AS")? Variable)? ("WHERE" Expr)?
+
+SetStmnt ::= "SET" Identifier StringLiteral
+
+
+
+
+
+
+
+

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_interval_joins.md b/asterixdb/asterix-doc/src/main/interval_join/interval_join.md
similarity index 97%
rename from asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_interval_joins.md
rename to asterixdb/asterix-doc/src/main/interval_join/interval_join.md
index 45f13f3..bde7869 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_interval_joins.md
+++ b/asterixdb/asterix-doc/src/main/interval_join/interval_join.md

@@ -17,6 +17,7 @@
  ! under the License.
  !-->
 
+## <a id="Interval_joins">Interval Joins</a>
 This system allows for the 13 types of Allen's interval-join relations.
 The default, when using these joins, is either Nested Loop, or Hybrid Hash Join.
 The optimal algorithm will be automatically selected based on the query.
@@ -27,7 +28,6 @@
 To use interval merge join you must include a range hint.
 Adding a range hint allows for the system to pick interval merge join.
 
-## <a id="Interval_joins">Types of Interval Joins</a>
 The 13 interval functions are `interval_after()`, `interval_before()`, `interval_covers()`, `interval_covered_by()`,
 `interval_ends()`, `interval_ended_by()`, `interval_meets()`, `interval_met_by()`, `interval_overlaps()`,
 `interval_overlapping()`, `interval_overlapped_by()`, `interval_starts()`, and `interval_started_by()`.
@@ -52,7 +52,7 @@
 | Overlapping(A, B)| (A.start >= B.start and B.start < A.end) or (B.end <= A.end and B.end < A.start)|
 | Starts(A, B) and Started_by(B, A) | A.start = B.start and A.end <= B.end |
 
-## <a id="Range_hint">Using a Range Hint</a>
+### <a id="Range_hint"> Using a Range Hint </a>
 
 To use an efficient interval join the data must be partitioned with the details in a range hint.
 Interval joins with a range hint currently work for intervals types of date, datetime, or time;

diff --git a/asterixdb/asterix-doc/src/main/interval_join/interval_join_title.md b/asterixdb/asterix-doc/src/main/interval_join/interval_join_title.md
new file mode 100644
index 0000000..827609e
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/interval_join/interval_join_title.md

@@ -0,0 +1,24 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# Interval Joins
+
+## <a id="#toc">Table of Contents</a> ##
+* [Introduction](#Interval_joins)
+* [Range Hints](#Range_hint)

diff --git a/asterixdb/asterix-doc/src/main/markdown/builtins/14_window.md b/asterixdb/asterix-doc/src/main/markdown/builtins/14_window.md
index d88d5e8..e0e5948 100644
--- a/asterixdb/asterix-doc/src/main/markdown/builtins/14_window.md
+++ b/asterixdb/asterix-doc/src/main/markdown/builtins/14_window.md

@@ -28,13 +28,10 @@
 separate in the query output.
 
 All window functions must be used with an OVER clause.
-Refer to [OVER Clauses](manual.html#Over_clauses) for details.
+Refer to [Window Queries](manual.html#Over_clauses) for details.
 
 Window functions cannot appear in the FROM clause clause or LIMIT clause.
 
-The examples in this section use the `GleambookMessages` dataset,
-described in the section on [SELECT Statements](manual.html#SELECT_statements).
-
 ### cume_dist ###
 
 * Syntax:

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
index ca1ca19..1ac6ab0 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md

@@ -17,7 +17,7 @@
  ! under the License.
  !-->
 
-# The Query Language
+# The SQL++ Query Language
 
 * [1. Introduction](#Introduction)
 * [2. Expressions](#Expressions)
@@ -30,76 +30,70 @@
       * [Path Expressions](#Path_expressions)
       * [Primary Expressions](#Primary_expressions)
            * [Literals](#Literals)
-           * [Variable References](#Variable_references)
+           * [Identifiers and Variable References](#Variable_references)
+		   * [Parameter References](#Parameter_references)
            * [Parenthesized Expressions](#Parenthesized_expressions)
-           * [Function call Expressions](#Function_call_expressions)
+           * [Function calls](#Function_call_expressions)
            * [Case Expressions](#Case_expressions)
            * [Constructors](#Constructors)
 * [3. Queries](#Queries)
-      * [Declarations](#Declarations)
-      * [SELECT Statements](#SELECT_statements)
       * [SELECT Clauses](#Select_clauses)
-           * [Select Element/Value/Raw](#Select_element)
+           * [Select Value](#Select_element)
            * [SQL-style Select](#SQL_select)
            * [Select *](#Select_star)
            * [Select Distinct](#Select_distinct)
            * [Unnamed Projections](#Unnamed_projections)
            * [Abbreviated Field Access Expressions](#Abbreviated_field_access_expressions)
-      * [UNNEST Clauses](#Unnest_clauses)
-           * [Inner Unnests](#Inner_unnests)
-           * [Left Outer Unnests](#Left_outer_unnests)
-           * [Expressing Joins Using Unnests](#Expressing_joins_using_unnests)
       * [FROM clauses](#From_clauses)
-           * [Binding Expressions](#Binding_expressions)
-           * [Multiple From Terms](#Multiple_from_terms)
-           * [Expressing Joins Using From Terms](#Expressing_joins_using_from_terms)
-           * [Implicit Binding Variables](#Implicit_binding_variables)
-      * [JOIN Clauses](#Join_clauses)
-           * [Inner Joins](#Inner_joins)
-           * [Left Outer Joins](#Left_outer_joins)
-      * [GROUP BY Clauses](#Group_By_clauses)
-           * [Group Variables](#Group_variables)
-           * [Implicit Group Key Variables](#Implicit_group_key_variables)
-           * [Implicit Group Variables](#Implicit_group_variables)
-           * [Aggregation Functions](#Aggregation_functions)
-           * [SQL-92 Aggregation Functions](#SQL-92_aggregation_functions)
-           * [SQL-92 Compliant GROUP BY Aggregations](#SQL-92_compliant_gby)
-           * [Column Aliases](#Column_aliases)
-      * [WHERE Clauses and HAVING Clauses](#Where_having_clauses)
-      * [ORDER BY Clauses](#Order_By_clauses)
-      * [LIMIT Clauses](#Limit_clauses)
-      * [WITH Clauses](#With_clauses)
-      * [LET Clauses](#Let_clauses)
-      * [UNION ALL](#Union_all)
-      * [OVER Clauses](#Over_clauses)
-           * [Window Function Call](#Window_function_call)
+           * [Joins](#Joins)
+	  * [LET Clauses](#Let_clauses)
+	  * [WHERE Clause](#WHERE_Clause)
+      * [Grouping](#Grouping)
+           * [GROUP BY Clause](#GROUP_BY_Clause)
+           * [HAVING Clause](#HAVING_Clause)
+		   * [Aggregation Pseudo-functions](#Aggregation_PseudoFunctions)
+           * [GROUP AS Clause](#GROUP_AS_Clause)
+      * [Selection and UNION ALL](#Union_all)
+	  * [WITH Clauses](#With_clauses)
+      * [ORDER By and LIMIT Clauses](#Order_By_clauses)
+	  * [Subqueries](#Subqueries)
+* [4. Window Functions](#Over_clauses)
+      * [Window Function Call](#Window_function_call)
+	       * [Window Function Arguments](#Window_function_arguments)
            * [Window Function Options](#Window_function_options)
            * [Window Frame Variable](#Window_frame_variable)
            * [Window Definition](#Window_definition)
-      * [Differences from SQL-92](#Vs_SQL-92)
-* [4. Errors](#Errors)
+* [5. Errors](#Errors)
       * [Syntax Errors](#Syntax_errors)
       * [Identifier Resolution Errors](#Identifier_resolution_errors)
       * [Type Errors](#Type_errors)
       * [Resource Errors](#Resource_errors)
-* [5. DDL and DML Statements](#DDL_and_DML_statements)
+* [6.Differences from SQL-92](#Vs_SQL-92)
+* [7. DDL and DML Statements](#DDL_and_DML_statements)
       * [Lifecycle Management Statements](#Lifecycle_management_statements)
-           * [Dataverses](#Dataverses)
-           * [Types](#Types)
-           * [Datasets](#Datasets)
-           * [Indices](#Indices)
-           * [Functions](#Functions)
-           * [Synonyms](#Synonyms)
-           * [Removal](#Removal)
-           * [Load Statement](#Load_statement)
+		   * [Use Statement](#Use)
+		   * [Set Statement](#Sets)
+		   * [Function Declaration](#Functions)
+		   * [Create Statement](#Create)
+			* [Create Dataverse](#Dataverses)
+			* [Create Type](#Types)
+			* [Create Dataset](#Datasets)
+			* [Create Index](#Indices)
+			* [Create Synonym](#Synonyms)
+			* [Create Function](#Create_function)
+		   * [Drop Statement](#Removal)
+		   * [Load Statement](#Load_statement)
       * [Modification Statements](#Modification_statements)
-           * [Inserts](#Inserts)
-           * [Upserts](#Upserts)
-           * [Deletes](#Deletes)
+           * [Insert Statement](#Inserts)
+           * [Upsert Statement](#Upserts)
+           * [Delete Statement](#Deletes)
 * [Appendix 1. Reserved Keywords](#Reserved_keywords)
 * [Appendix 2. Performance Tuning](#Performance_tuning)
       * [Parallelism Parameter](#Parallelism_parameter)
-      * [Interval Joins](#Interval_joins)
       * [Memory Parameters](#Memory_parameters)
       * [Query Hints](#Query_hints)
 * [Appendix 3. Variable Bindings and Name Resolution](#Variable_bindings_and_name_resolution)
+* [Appendix 4. Example Data](#Manual_data)
+	  * [Data Definitions](#definition_statements)
+	  * [Customers Dataset](#customers_data)
+	  * [Orders Dataset](#orders_data)

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/1_intro.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/1_intro.md
index 8590c2e..7b56e12 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/1_intro.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/1_intro.md

@@ -30,14 +30,16 @@
 databases, while SQL++ generalizes SQL to also handle nested data formats (like JSON) and
 the schema-optional (or even schema-less) data models of modern NoSQL and BigData systems.
 
-In the context of Apache AsterixDB, the query language is intended for working with the Asterix Data Model
+In the context of Apache AsterixDB, SQL++ is intended for working with the Asterix Data Model
 ([ADM](../datamodel.html)), a data model based on a superset of JSON with an enriched and flexible type system.
 New AsterixDB users are encouraged to read and work through the (much friendlier) guide
 "[AsterixDB 101: An ADM and SQL++ Primer](primer-sqlpp.html)" before attempting to make use of this document.
 In addition, readers are advised to read through the [Asterix Data Model (ADM) reference guide](../datamodel.html)
-first as well, as an understanding of the data model is a prerequisite to understanding the query language.
+first as well, as an understanding of the data model is a prerequisite to understanding SQL++.
 
-In what follows, we detail the features of the query language in a grammar-guided manner.
+In what follows, we detail the features of the SQL++ language in a grammar-guided manner.
 We list and briefly explain each of the productions in the query grammar, offering examples
-(and results) for clarity.
+(and results) for clarity. In this manual, we will explain how to use the various features of SQL++
+using two datasets named `customers` and `orders`. Each dataset is a collection of objects.
+The contents of the example datasets can be found at the end of this manual in [Appendix 4](#Manual_data).
 

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/2_expr.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/2_expr.md
index 2e0b526..9979570 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/2_expr.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/2_expr.md

@@ -17,27 +17,31 @@
  ! under the License.
  !-->
 
-The query language is a highly composable expression language.
-Each expression in the query language returns zero or more data model instances.
-There are three major kinds of expressions.
-At the topmost level, an expression can be an OperatorExpression (similar to a mathematical expression) or a
-QuantifiedExpression (which yields a boolean value).
-Each will be detailed as we explore the full grammar of the language.
+An expression is a language fragment that can be evaluated to return a value. For example, the expression 2 + 3 returns the value 5. Expressions are the building blocks from which queries are constructed. SQL++ supports nearly all of the kinds of expressions in SQL, and adds some new kinds as well.
 
-    Expression ::= OperatorExpression | QuantifiedExpression
+SQL++ is an orthogonal language, which means that expressions can serve as operands of higher level expressions. By nesting expressions inside other expressions, complex queries can be built up. Any expression can be enclosed in parentheses to establish operator precedence.
 
-Note that in the following text, words enclosed in angle brackets denote keywords that are not case-sensitive.
+In this section, we'll discuss the various kinds of SQL++ expressions.
 
+---
+
+### Expr
+**![](../images/diagrams/Expr.png)**
+
+
+---
 
 ## <a id="Operator_expressions">Operator Expressions</a>
 
 Operators perform a specific operation on the input values or expressions.
 The syntax of an operator expression is as follows:
 
-    OperatorExpression ::= PathExpression
-                           | Operator OperatorExpression
-                           | OperatorExpression Operator (OperatorExpression)?
-                           | OperatorExpression <BETWEEN> OperatorExpression <AND> OperatorExpression
+---
+
+### OperatorExpr
+**![](../images/diagrams/OperatorExpr.png)**
+
+---
 
 The language provides a full set of operators that you can use within its statements.
 Here are the categories of operators:
@@ -64,7 +68,7 @@
 | OR                                                                          | Disjunction |
 
 In general, if any operand evaluates to a `MISSING` value, the enclosing operator will return `MISSING`;
-if none of operands evaluates to a `MISSING` value but there is an operand evaluates to a `NULL` value,
+if none of the operands evaluates to a `MISSING` value but there is an operand which evaluates to a `NULL` value,
 the enclosing operator will return `NULL`. However, there are a few exceptions listed in
 [comparison operators](#Comparison_operators) and [logical operators](#Logical_operators).
 
@@ -89,15 +93,15 @@
 
 | Operator   |  Purpose                                     | Example    |
 |------------|----------------------------------------------|------------|
-| IN         |  Membership test                             | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.lang IN ["en", "de"]; |
-| NOT IN     |  Non-membership test                         | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.lang NOT IN ["en"]; |
-| EXISTS     |  Check whether a collection is not empty     | SELECT * FROM ChirpMessages cm <br/>WHERE EXISTS cm.referredTopics; |
-| NOT EXISTS |  Check whether a collection is empty         | SELECT * FROM ChirpMessages cm <br/>WHERE NOT EXISTS cm.referredTopics; |
+| IN         |  Membership test                             | FROM customers AS c <br/>WHERE c.address.zipcode IN ["02340", "02115"] <br/> SELECT *; |
+| NOT IN     |  Non-membership test                         | FROM customers AS c <br/>WHERE c.address.zipcode NOT IN ["02340", "02115"] <br/> SELECT *;|
+| EXISTS     |  Check whether a collection is not empty     | FROM orders AS o <br/>WHERE EXISTS  o.items <br/> SELECT *;|
+| NOT EXISTS |  Check whether a collection is empty         | FROM orders AS o <br/>WHERE NOT EXISTS  o.items <br/> SELECT *; |
 
 ### <a id="Comparison_operators">Comparison Operators</a>
 Comparison operators are used to compare values.
 The comparison operators fall into one of two sub-categories: missing value comparisons and regular value comparisons.
-The query language (and JSON) has two ways of representing missing information in a object - the presence of the field
+SQL++ (and JSON) has two ways of representing missing information in an object - the presence of the field
 with a NULL for its value (as in SQL), and the absence of the field (which JSON permits).
 For example, the first of the following objects represents Jack, whose friend is Jill.
 In the other examples, Jake is friendless a la SQL, with a friend field that is NULL, while Joe is friendless in a more
@@ -110,32 +114,32 @@
 
 {"name": "Joe"}
 
-The following table enumerates all of the query language's comparison operators.
+The following table enumerates all of the comparison operators available in SQL++.
 
 | Operator       |  Purpose                                       | Example    |
 |----------------|------------------------------------------------|------------|
-| IS NULL        |  Test if a value is NULL                       | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS NULL; |
-| IS NOT NULL    |  Test if a value is not NULL                   | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS NOT NULL; |
-| IS MISSING     |  Test if a value is MISSING                    | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS MISSING; |
-| IS NOT MISSING |  Test if a value is not MISSING                | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS NOT MISSING;|
-| IS UNKNOWN     |  Test if a value is NULL or MISSING            | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS UNKNOWN; |
-| IS NOT UNKNOWN |  Test if a value is neither NULL nor MISSING   | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS NOT UNKNOWN;|
-| IS KNOWN (IS VALUED) |  Test if a value is neither NULL nor MISSING | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS KNOWN; |
-| IS NOT KNOWN (IS NOT VALUED) |  Test if a value is NULL or MISSING | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name IS NOT KNOWN; |
-| BETWEEN        |  Test if a value is between a start value and <br/>a end value. The comparison is inclusive <br/>to both start and end values. |  SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId BETWEEN 10 AND 20;|
-| =              |  Equality test                                 | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId=10; |
-| !=             |  Inequality test                               | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId!=10;|
-| <>             |  Inequality test                               | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId<>10;|
-| <              |  Less than                                     | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId<10; |
-| >              |  Greater than                                  | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId>10; |
-| <=             |  Less than or equal to                         | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId<=10; |
-| >=             |  Greater than or equal to                      | SELECT * FROM ChirpMessages cm <br/>WHERE cm.chirpId>=10; |
-| LIKE           |  Test if the left side matches a<br/> pattern defined on the right<br/> side; in the pattern,  "%" matches  <br/>any string while "&#95;" matches <br/> any character. | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name LIKE "%Giesen%";|
-| NOT LIKE       |  Test if the left side does not <br/>match a pattern defined on the right<br/> side; in the pattern,  "%" matches <br/>any string while "&#95;" matches <br/> any character. | SELECT * FROM ChirpMessages cm <br/>WHERE cm.user.name NOT LIKE "%Giesen%";|
+| IS NULL        |  Test if a value is NULL                       |FROM customers AS c <br/>WHERE c.name IS NULL <br/> SELECT *; |
+| IS NOT NULL    |  Test if a value is not NULL                   | FROM customers AS c <br/>WHERE c.name IS NOT NULL <br/> SELECT *; |
+| IS MISSING     |  Test if a value is MISSING                    | FROM customers AS c <br/>WHERE c.name IS MISSING <br/> SELECT *;  |
+| IS NOT MISSING |  Test if a value is not MISSING                | FROM customers AS c <br/>WHERE c.name IS NOT MISSING <br/> SELECT *; |
+| IS UNKNOWN     |  Test if a value is NULL or MISSING            | FROM customers AS c <br/>WHERE c.name IS UNKNOWN <br/> SELECT *; |
+| IS NOT UNKNOWN |  Test if a value is neither NULL nor MISSING   | FROM customers AS c <br/>WHERE c.name IS NOT UNKNOWN <br/> SELECT *; |
+| IS KNOWN (IS VALUED) |  Test if a value is neither NULL nor MISSING | FROM customers AS c <br/>WHERE c.name IS KNOWN <br/> SELECT *;  |
+| IS NOT KNOWN (IS NOT VALUED) |  Test if a value is NULL or MISSING | FROM customers AS c <br/>WHERE c.name IS NOT KNOWN <br/> SELECT *;  |
+| BETWEEN        |  Test if a value is between a start value and a end value. The comparison is inclusive of both the start and end values. |  FROM customers AS c WHERE c.rating BETWEEN 600 AND 700 SELECT *;|
+| =              |  Equality test                                 | FROM customers AS c <br/> WHERE c.rating = 640 <br/> SELECT *; |
+| !=             |  Inequality test                               | FROM customers AS c <br/> WHERE c.rating != 640 <br/> SELECT *;|
+| <>             |  Inequality test                               | FROM customers AS c <br/> WHERE c.rating <> 640 <br/> SELECT *;|
+| <              |  Less than                                     | FROM customers AS c <br/> WHERE c.rating < 640 <br/> SELECT *; |
+| >              |  Greater than                                  | FROM customers AS c <br/> WHERE c.rating > 640 <br/> SELECT *; |
+| <=             |  Less than or equal to                         | FROM customers AS c <br/> WHERE c.rating <= 640 <br/> SELECT *; |
+| >=             |  Greater than or equal to                      | FROM customers AS c <br/> WHERE c.rating >= 640 <br/> SELECT *; |
+| LIKE           |  Test if the left side matches a pattern defined on the right side; in the pattern,  "%" matches any string while "&#95;" matches any character. | FROM customers AS c WHERE c.name LIKE "%Dodge%" SELECT *;|
+| NOT LIKE       |  Test if the left side does not match a pattern defined on the right side; in the pattern, "%" matches any string while "&#95;" matches any character. | FROM customers AS c WHERE c.name NOT LIKE "%Dodge%" SELECT *;|
 
 The following table summarizes how the missing value comparison operators work.
 
-| Operator | Non-NULL/Non-MISSING value | NULL | MISSING |
+| Operator | Non-NULL/Non-MISSING value | NULL value| MISSING value|
 |----------|----------------|------|---------|
 | IS NULL  | FALSE | TRUE | MISSING |
 | IS NOT NULL | TRUE | FALSE | MISSING |
@@ -151,9 +155,9 @@
 
 | Operator |  Purpose                                   | Example    |
 |----------|-----------------------------------------------------------------------------|------------|
-| NOT      |  Returns true if the following condition is false, otherwise returns false  | SELECT VALUE NOT TRUE;  |
-| AND      |  Returns true if both branches are true, otherwise returns false            | SELECT VALUE TRUE AND FALSE; |
-| OR       |  Returns true if one branch is true, otherwise returns false                | SELECT VALUE FALSE OR FALSE; |
+| NOT      |  Returns true if the following condition is false, otherwise returns false  | SELECT VALUE NOT 1 = 1; <br/> Returns FALSE  |
+| AND      |  Returns true if both branches are true, otherwise returns false            | SELECT VALUE 1 = 2 AND 1 = 1; <br/> Returns FALSE|
+| OR       |  Returns true if one branch is true, otherwise returns false                | SELECT VALUE 1 = 2 OR 1 = 1; <br/> Returns TRUE |
 
 The following table is the truth table for `AND` and `OR`.
 
@@ -182,45 +186,50 @@
 
 ## <a id="Quantified_expressions">Quantified Expressions</a>
 
-    QuantifiedExpression ::= ( (<ANY>|<SOME>) | <EVERY> ) Variable <IN> Expression ( "," Variable "in" Expression )*
-                             <SATISFIES> Expression (<END>)?
+---
 
-Quantified expressions are used for expressing existential or universal predicates involving the elements of a
-collection.
+### QuantifiedExpr
+**![](../images/diagrams/QuantifiedExpr.png)**
+##### Synonym for `SOME`: `ANY` 
+ 
+---
+
+Quantified expressions are used for expressing existential or universal predicates involving the elements of a collection.
 
 The following pair of examples illustrate the use of a quantified expression to test that every (or some) element in the
 set [1, 2, 3] of integers is less than three. The first example yields `FALSE` and second example yields `TRUE`.
 
-It is useful to note that if the set were instead the empty set, the first expression would yield `TRUE` ("every" value in an
-empty set satisfies the condition) while the second expression would yield `FALSE` (since there isn't "some" value, as there are
-no values in the set, that satisfies the condition).
+It is useful to note that if the set were instead the empty set, the first expression would yield `TRUE` ("every" value in an empty set satisfies the condition) while the second expression would yield `FALSE` (since there isn't "some" value, as there are no values in the set, that satisfies the condition).
 
 A quantified expression will return a `NULL` (or `MISSING`) if the first expression in it evaluates to `NULL` (or `MISSING`).
-A type error will be raised if the first expression in a quantified expression does not return a collection.
+Otherwise, a type error will be raised if the first expression in a quantified expression does not return a collection.
 
 ##### Examples
 
-    EVERY x IN [ 1, 2, 3 ] SATISFIES x < 3
-    SOME x IN [ 1, 2, 3 ] SATISFIES x < 3
+    EVERY x IN [ 1, 2, 3 ] SATISFIES x < 3		Returns FALSE
+    SOME x IN [ 1, 2, 3 ] SATISFIES x < 3		Returns TRUE	
 
 
 ## <a id="Path_expressions">Path Expressions</a>
 
-    PathExpression  ::= PrimaryExpression ( Field | Index )*
-    Field           ::= "." Identifier
-    Index           ::= "[" Expression (":" ( Expression )? )? "]"
+---
+
+### PathExpr
+**![](../images/diagrams/PathExpr.png)**
+
+---
 
 Components of complex types in the data model are accessed via path expressions. Path access can be applied to the
 result of a query expression that yields an instance of a complex type, for example, an object or an array instance.
 
 For objects, path access is based on field names, and it accesses the field whose name was specified.<br/>
-For arrays, path access is based on (zero-based) array-style indexing. Array indexes can be used to retrieve either a
-single element from an array, or a whole subset of an array. Accessing a single element is achieved by
-providing a single index argument (zero-based element position), while obtaining a subset of an array is achieved by
+
+For arrays, path access is based on (zero-based) array-style indexing. Array indices can be used to retrieve either a single element from an array, or a whole subset of an array. Accessing a single element is achieved by providing a single index argument (zero-based element position), while obtaining a subset of an array is achieved by
 providing the `start` and `end` (zero-based) index positions; the returned subset is from position `start` to position
 `end - 1`; the `end` position argument is optional. If a position argument is negative then the element position is
-counted from the end of the array (`-1` addresses the last element, `-2` next to last, and so on). Multisets have
-similar behavior to arrays, except for retrieving arbitrary items as the order of items is not fixed in multisets.
+counted from the end of the array (`-1` addresses the last element, `-2` next to last, and so on).
+
+Multisets have similar behavior to arrays, except for retrieving arbitrary items as the order of items is not fixed in multisets.
 
 Attempts to access non-existent fields or out-of-bound array elements produce the special value `MISSING`. Type errors
 will be raised for inappropriate use of a path expression, such as applying a field accessor to a numeric value.
@@ -230,145 +239,113 @@
 
 ##### Examples
 
-    ({"name": "MyABCs", "array": [ "a", "b", "c"]}).array
+    ({"name": "MyABCs", "array": [ "a", "b", "c"]}).array						Returns [["a", "b", "c"]]
 
-    (["a", "b", "c"])[2]
+    (["a", "b", "c"])[2]										Returns ["c"]
     
-    (["a", "b", "c"])[-1]
+    (["a", "b", "c"])[-1]										Returns ["c"]
 
-    ({"name": "MyABCs", "array": [ "a", "b", "c"]}).array[2]
+    ({"name": "MyABCs", "array": [ "a", "b", "c"]}).array[2]					Returns ["c"]
 
-    (["a", "b", "c"])[0:2]
+    (["a", "b", "c"])[0:2]										Returns [["a", "b"]]
 
-    (["a", "b", "c"])[0:]
+    (["a", "b", "c"])[0:]										Returns [["a", "b", "c"]]
     
-    (["a", "b", "c"])[-2:-1]
+    (["a", "b", "c"])[-2:-1]									Returns [["b"]]
 
 
 ## <a id="Primary_expressions">Primary Expressions</a>
 
-    PrimaryExpr ::= Literal
-                  | VariableReference
-                  | ParameterReference
-                  | ParenthesizedExpression
-                  | FunctionCallExpression
-                  | CaseExpression
-                  | Constructor
+---
 
-The most basic building block for any expression in the query language is PrimaryExpression.
+### PrimaryExpr
+**![](../images/diagrams/PrimaryExpr.png)**
+
+---
+
+The most basic building block for any expression in SQL++ is PrimaryExpression.
 This can be a simple literal (constant) value, a reference to a query variable that is in scope, a parenthesized
 expression, a function call, or a newly constructed instance of the data model (such as a newly constructed object,
 array, or multiset of data model instances).
 
-## <a id="Literals">Literals</a>
+### <a id="Literals">Literals</a>
 
-    Literal        ::= StringLiteral
-                       | IntegerLiteral
-                       | FloatLiteral
-                       | DoubleLiteral
-                       | <NULL>
-                       | <MISSING>
-                       | <TRUE>
-                       | <FALSE>
-    StringLiteral  ::= "\"" (
-                                 <EscapeQuot>
-                               | <EscapeBslash>
-                               | <EscapeSlash>
-                               | <EscapeBspace>
-                               | <EscapeFormf>
-                               | <EscapeNl>
-                               | <EscapeCr>
-                               | <EscapeTab>
-                               | ~["\"","\\"])*
-                        "\""
-                        | "\'"(
-                                 <EscapeApos>
-                               | <EscapeBslash>
-                               | <EscapeSlash>
-                               | <EscapeBspace>
-                               | <EscapeFormf>
-                               | <EscapeNl>
-                               | <EscapeCr>
-                               | <EscapeTab>
-                               | ~["\'","\\"])*
-                          "\'"
-    <ESCAPE_Apos>  ::= "\\\'"
-    <ESCAPE_Quot>  ::= "\\\""
-    <EscapeBslash> ::= "\\\\"
-    <EscapeSlash>  ::= "\\/"
-    <EscapeBspace> ::= "\\b"
-    <EscapeFormf>  ::= "\\f"
-    <EscapeNl>     ::= "\\n"
-    <EscapeCr>     ::= "\\r"
-    <EscapeTab>    ::= "\\t"
+---
 
-    IntegerLiteral ::= <DIGITS>
-    <DIGITS>       ::= ["0" - "9"]+
-    FloatLiteral   ::= <DIGITS> ( "f" | "F" )
-                     | <DIGITS> ( "." <DIGITS> ( "f" | "F" ) )?
-                     | "." <DIGITS> ( "f" | "F" )
-    DoubleLiteral  ::= <DIGITS> "." <DIGITS>
-                       | "." <DIGITS>
+### Literal
+**![](../images/diagrams/Literal.png)**
 
-Literals (constants) in a query can be strings, integers, floating point values, double values, boolean constants, or
-special constant values like `NULL` and `MISSING`.
-The `NULL` value is like a `NULL` in SQL; it is used to represent an unknown field value.
-The special value `MISSING` is only meaningful in the context of field accesses; it occurs when the accessed field
-simply does not exist at all in a object being accessed.
+---
 
-The following are some simple examples of literals.
+The simplest kind of expression is a literal that directly represents a value in JSON format. Here are some examples:
 
-##### Examples
+  
 
-    'a string'
-    "test string"
-    42
+	-42
+	"Hello"
+	true
+	false
+	null
 
-Different from standard SQL, double quotes play the same role as single quotes and may be used for string literals in queries as well.
+ 
+Numeric literals may include a sign and an optional decimal point. They may also be written in exponential notation, like this:
 
-### <a id="Variable_references">Variable References</a>
+  
+	5e2
+	-4.73E-2
 
-    VariableReference     ::= <IDENTIFIER> | <DelimitedIdentifier>
-    <IDENTIFIER>          ::= (<LETTER> | "_") (<LETTER> | <DIGIT> | "_" | "$")*
-    <LETTER>              ::= ["A" - "Z", "a" - "z"]
-    DelimitedIdentifier   ::= "`" (<EscapeQuot>
-                                    | <EscapeBslash>
-                                    | <EscapeSlash>
-                                    | <EscapeBspace>
-                                    | <EscapeFormf>
-                                    | <EscapeNl>
-                                    | <EscapeCr>
-                                    | <EscapeTab>
-                                    | ~["`","\\"])*
-                              "`"
+  
 
-A variable in a query can be bound to any legal data model value.
-A variable reference refers to the value to which an in-scope variable is bound.
-(E.g., a variable binding may originate from one of the `FROM`, `WITH` or `LET` clauses of a `SELECT` statement or from
-an input parameter in the context of a function body.)
-Backticks, for example, \`id\`, are used for delimited identifiers.
-Delimiting is needed when a variable's desired name clashes with a keyword or includes characters not allowed in regular
-identifiers.
-More information on exactly how variable references are resolved can be found in the appendix section on Variable
-Resolution.
+String literals may be enclosed in either single quotes or double quotes. Inside a string literal, the delimiter character for that string must be "escaped" by a backward slash, as in these examples:
 
-##### Examples
+  
 
-    tweet
-    id
-    `SELECT`
-    `my-function`
+	"I read \"War and Peace\" today."
+	'I don\'t believe everything I read.'
+
+The table below shows how to escape characters in SQL++
+
+|Character Name |Escape Method
+|----------|----------------|
+|Single Quote| `\'`|
+|Double Quote|`\"`|
+|Backslash|`\\`|
+|Slash|`\/`|
+|Backspace|`\b`|
+|Formfeed|`\f`|
+|Newline|`\n`|
+|CarriageReturn|`\r`|
+|EscapeTab|`\t`|
+
+
+
+### <a id="Variable_references">Identifiers and Variable References</a>
+
+ 
+Like SQL, SQL++ makes use of a language construct called an *identifier*. An identifier starts with an alphabetic character or the underscore character _ , and contains only case-sensitive alphabetic characters, numeric digits, or the special characters _ and $. It is also possible for an identifier to include other special characters, or to be the same as a reserved word, by enclosing the identifier in back-ticks (it's then called a *delimited identifier*). Identifiers are used in variable names and in certain other places in SQL++ syntax, such as in path expressions, which we'll discuss soon. Here are some examples of identifiers:
+
+	X
+	customer_name
+	`SELECT`
+	`spaces in here`
+	`@&#`
+
+ 
+A very simple kind of SQL++ expression is a variable, which is simply an identifier. As in SQL, a variable can be bound to a value, which may be an input dataset, some intermediate result during processing of a query, or the final result of a query. We'll learn more about variables when we discuss queries.
+
+Note that the SQL++ rules for delimiting strings and identifiers are different from the SQL rules. In SQL, strings are always enclosed in single quotes, and double quotes are used for delimited identifiers.
 
 ### <a id="Parameter_references">Parameter References</a>
 
-    ParameterReference              ::= NamedParameterReference | PositionalParameterReference
-    NamedParameterReference         ::= "$" (<IDENTIFIER> | <DelimitedIdentifier>)
-    PositionalParameterReference    ::= ("$" <DIGITS>) | "?"
+A parameter reference is an external variable. Its value is provided using the [statement execution API](../api.html#queryservice).
 
-A statement parameter is an external variable which value is provided through the [statement execution API](../api.html#queryservice).
-An error will be raised if the parameter is not bound at the query execution time.
-Positional parameter numbering starts at 1.
-"?" parameters are interpreted as $1, .. $N in the order in which they appear in the statement.
+Parameter references come in two forms, *Named Parameter References* and *Positional Parameter References.*
+
+Named paramater references consist of the "$" symbol  followed by an identifier or delimited identifier.
+
+Positional parameter references can be either a "$" symbol followed by one or more digits or a "?" symbol. If numbered, positional parameters start at 1. "?" parameters are interpreted as $1 to $N based on the order in which they appear in the statement.
+
+Parameter references may appear as shown in the below examples:
 
 ##### Examples
 
@@ -376,12 +353,23 @@
     $1
     ?
 
+An error will be raised in the parameter is not bound at query execution time.
+
 ### <a id="Parenthesized_expressions">Parenthesized Expressions</a>
 
-    ParenthesizedExpression ::= "(" Expression ")" | Subquery
+---
+
+### ParenthesizedExpr
+**![](../images/diagrams/ParenthesizedExpr.png)**
+
+### Subquery
+**![](../images/diagrams/Subquery.png)**
+
+---
 
 An expression can be parenthesized to control the precedence order or otherwise clarify a query.
-For composability, a subquery is also an parenthesized expression.
+A [subquery](#Subqueries) (nested [selection](#Union_all)) may also be enclosed in parentheses. For more on these topics please see their respective sections.
+
 
 The following expression evaluates to the value 2.
 
@@ -389,20 +377,30 @@
 
     ( 1 + 1 )
 
-### <a id="Function_call_expressions">Function Call Expressions</a>
+### <a id="Function_call_expressions">Function Calls</a>
 
-    FunctionCallExpression ::= ( FunctionName "(" ( Expression ( "," Expression )* )? ")" ) | WindowFunctionCall
+---
 
-Functions are included in the query language, like most languages, as a way to package useful functionality or to
+### FunctionCall
+**![](../images/diagrams/FunctionCall.png)**
+
+### OrdinaryFunctionCall
+**![](../images/diagrams/OrdinaryFunctionCall.png)**
+
+### AggregateFunctionCall
+**![](../images/diagrams/AggregateFunctionCall.png)**
+
+---
+
+Functions are included in SQL++, like most languages, as a way to package useful functionality or to
 componentize complicated or reusable computations.
 A function call is a legal query expression that represents the value resulting from the evaluation of its body
-expression with the given parameter bindings; the parameter value bindings can themselves be any expressions in the
-query language.
+expression with the given parameter bindings; the parameter value bindings can themselves be any expressions in SQL++.
 
 Note that Window functions, and aggregate functions used as window functions, have a more complex syntax.
-Window function calls are described in the section on [OVER Clauses](#Over_clauses).
+Window function calls are described in the section on [Window Queries](#Over_clauses).
 
-The following example is a (built-in) function call expression whose value is 8.
+The following example is a function call expression whose value is 8.
 
 ##### Example
 
@@ -410,9 +408,18 @@
 
 ## <a id="Case_expressions">Case Expressions</a>
 
-    CaseExpression ::= SimpleCaseExpression | SearchedCaseExpression
-    SimpleCaseExpression ::= <CASE> Expression ( <WHEN> Expression <THEN> Expression )+ ( <ELSE> Expression )? <END>
-    SearchedCaseExpression ::= <CASE> ( <WHEN> Expression <THEN> Expression )+ ( <ELSE> Expression )? <END>
+---
+
+### CaseExpr
+**![](../images/diagrams/CaseExpr.png)**
+
+### SimpleCaseExpr
+**![](../images/diagrams/SimpleCaseExpr.png)**
+
+### SearchedCaseExpr
+**![](../images/diagrams/SearchedCaseExpr.png)**
+
+---
 
 In a simple `CASE` expression, the query evaluator searches for the first `WHEN` ... `THEN` pair in which the `WHEN` expression is equal to the expression following `CASE` and returns the expression following `THEN`. If none of the `WHEN` ... `THEN` pairs meet this condition, and an `ELSE` branch exists, it returns the `ELSE` expression. Otherwise, `NULL` is returned.
 
@@ -427,56 +434,86 @@
 
 ### <a id="Constructors">Constructors</a>
 
-    Constructor              ::= ArrayConstructor | MultisetConstructor | ObjectConstructor
-    ArrayConstructor         ::= "[" ( Expression ( "," Expression )* )? "]"
-    MultisetConstructor      ::= "{{" ( Expression ( "," Expression )* )? "}}"
-    ObjectConstructor        ::= "{" ( FieldBinding ( "," FieldBinding )* )? "}"
-    FieldBinding             ::= Expression ( ":" Expression )?
+---
 
-A major feature of the query language is its ability to construct new data model instances. This is accomplished using
-its constructors for each of the model's complex object structures, namely arrays, multisets, and objects.
-Arrays are like JSON arrays, while multisets have bag semantics.
-Objects are built from fields that are field-name/field-value pairs, again like JSON.
+### Constructor
+**![](../images/diagrams/Constructor.png)**
 
-The following examples illustrate how to construct a new array with 4 items and a new object with 2 fields respectively.
-Array elements can be homogeneous (as in the first example),
-which is the common case, or they may be heterogeneous (as in the second example). The data values and field name values
-used to construct arrays, multisets, and objects in constructors are all simply query expressions. Thus, the collection
-elements, field names, and field values used in constructors can be simple literals or they can come from query variable
-references or even arbitrarily complex query expressions (subqueries).
-Type errors will be raised if the field names in an object are not strings, and
-duplicate field errors will be raised if they are not distinct.
+### ObjectConstructor
+**![](../images/diagrams/ObjectConstructor.png)**
 
-##### Examples
+### ArrayConstructor
+**![](../images/diagrams/ArrayConstructor.png)**
 
-    [ 'a', 'b', 'c', 'c' ]
+### MultisetConstructor
+**![](../images/diagrams/MultisetConstructor.png)**
 
-    [ 42, "forty-two!", { "rank" : "Captain", "name": "America" }, 3.14159 ]
+---
 
-    {
-      'project name': 'Hyracks',
-      'project members': [ 'vinayakb', 'dtabass', 'chenli', 'tsotras', 'tillw' ]
-    }
+Structured JSON values can be represented by constructors, as in these examples:
+
+	An object: { "name": "Bill", "age": 42 }
+	An array: [ 1, 2, "Hello", null ]  
+  
+In a constructed object, the names of the fields must be strings (either literal strings or computed strings), and an object may not contain any duplicate names. Of course, structured literals can be nested, as in this example:
+
+  
+
+	[ {"name": "Bill",
+	   "address":
+	      {"street": "25 Main St.",
+	       "city": "Cincinnati, OH"  
+	      }
+	  },
+	  {"name": "Mary",
+	   "address":
+	      {"street": "107 Market St.",
+	       "city": "St. Louis, MO"
+	      }
+	   }
+	]
+
+  
+
+The array items in an array constructor, and the field-names and field-values in an object constructor, may be represented by expressions. For example, suppose that the variables firstname, lastname, salary, and bonus are bound to appropriate values. Then structured values might be constructed by the following expressions:
+
+  
+
+An object:
+
+	{ 
+	  "name": firstname || " " || lastname,  
+	  "income": salary + bonus  
+	}
+
+  
+An array:
+
+	["1984", lastname, salary + bonus, null]
 
 
 If only one expression is specified instead of the field-name/field-value pair in an object constructor then this
 expression is supposed to provide the field value. The field name is then automatically generated based on the 
-kind of the value expression:
+kind of the value expression as in Q2.1:
 
-  * If it is a variable reference expression then generated field name is the name of that variable.
-  * If it is a field access expression then generated field name is the last identifier in that expression.
+  * If it is a variable reference expression then the generated field name is the name of that variable.
+  * If it is a field access expression then the generated field name is the last identifier in that expression.
   * For all other cases, a compilation error will be raised.
  
-##### Example
 
-    SELECT VALUE { user.alias, user.userSince }
-    FROM GleambookUsers user
-    WHERE user.id = 1;
+##### Example
+(Q2.1)
+
+	FROM customers AS c
+	WHERE c.custid = "C47"
+	SELECT VALUE {c.name, c.rating}
 
 This query outputs:
 
-    [ {
-        "alias": "Margarita",
-        "userSince": "2012-08-20T10:10:00"
-    } ]
 
+	[
+	    {
+	        "name": "S. Logan",
+	        "rating": 625
+	    }
+	]

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query.md
index d2ec756..5e4358f 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query.md

@@ -17,491 +17,241 @@
  ! under the License.
  !-->
 
-##  <a id="SELECT_statements">SELECT Statements</a>
-
-The following shows the (rich) grammar for the `SELECT` statement in the query language.
-
-    SelectStatement    ::= ( WithClause )?
-                           SelectSetOperation (OrderbyClause )? ( LimitClause )?
-    SelectSetOperation ::= SelectBlock (<UNION> <ALL> ( SelectBlock | Subquery ) )*
-    Subquery           ::= "(" SelectStatement ")"
-
-    SelectBlock        ::= SelectClause
-                           ( FromClause ( LetClause )?)?
-                           ( WhereClause )?
-                           ( GroupbyClause ( LetClause )? ( HavingClause )? )?
-                           |
-                           FromClause ( LetClause )?
-                           ( WhereClause )?
-                           ( GroupbyClause ( LetClause )? ( HavingClause )? )?
-                           SelectClause
-
-    SelectClause       ::= <SELECT> ( <ALL> | <DISTINCT> )? ( SelectRegular | SelectValue )
-    SelectRegular      ::= Projection ( "," Projection )*
-    SelectValue        ::= ( <VALUE> | <ELEMENT> | <RAW> ) Expression
-    Projection         ::= ( Expression ( <AS> )? Identifier | "*" | Identifier "." "*" )
-
-    FromClause         ::= <FROM> FromTerm ( "," FromTerm )*
-    FromTerm           ::= Expression (( <AS> )? Variable)?
-                           ( ( JoinType )? ( JoinClause | UnnestClause ) )*
-
-    JoinClause         ::= <JOIN> Expression (( <AS> )? Variable)? <ON> Expression
-    UnnestClause       ::= ( <UNNEST> ) Expression
-                           ( <AS> )? Variable ( <AT> Variable )?
-    JoinType           ::= ( <INNER> | <LEFT> ( <OUTER> )? )
-
-    WithClause         ::= <WITH> WithElement ( "," WithElement )*
-    LetClause          ::= (<LET> | <LETTING>) LetElement ( "," LetElement )*
-    LetElement         ::= Variable "=" Expression
-    WithElement        ::= Variable <AS> Expression
-
-    WhereClause        ::= <WHERE> Expression
-
-    GroupbyClause      ::= <GROUP> <BY> Expression ( ( (<AS>)? Variable )?
-                           ( "," Expression ( (<AS>)? Variable )? )* )
-                           ( <GROUP> <AS> Variable
-                             ("(" VariableReference <AS> Identifier
-                             ("," VariableReference <AS> Identifier )* ")")?
-                           )?
-    HavingClause       ::= <HAVING> Expression
-
-    OrderbyClause      ::= <ORDER> <BY> Expression ( <ASC> | <DESC> )?
-                           ( "," Expression ( <ASC> | <DESC> )? )*
-    LimitClause        ::= <LIMIT> Expression ( <OFFSET> Expression )?
-
-In this section, we will make use of two stored collections of objects (datasets), `GleambookUsers` and `GleambookMessages`, in a series of running examples to explain `SELECT` queries. The contents of the example collections are as follows:
-
-`GleambookUsers` collection (or, dataset):
-
-    [ {
-      "id":1,
-      "alias":"Margarita",
-      "name":"MargaritaStoddard",
-      "nickname":"Mags",
-      "userSince":"2012-08-20T10:10:00",
-      "friendIds":[2,3,6,10],
-      "employment":[{
-                      "organizationName":"Codetechno",
-                      "start-date":"2006-08-06"
-                    },
-                    {
-                      "organizationName":"geomedia",
-                      "start-date":"2010-06-17",
-                      "end-date":"2010-01-26"
-                    }],
-      "gender":"F"
-    },
-    {
-      "id":2,
-      "alias":"Isbel",
-      "name":"IsbelDull",
-      "nickname":"Izzy",
-      "userSince":"2011-01-22T10:10:00",
-      "friendIds":[1,4],
-      "employment":[{
-                      "organizationName":"Hexviafind",
-                      "startDate":"2010-04-27"
-                   }]
-    },
-    {
-      "id":3,
-      "alias":"Emory",
-      "name":"EmoryUnk",
-      "userSince":"2012-07-10T10:10:00",
-      "friendIds":[1,5,8,9],
-      "employment":[{
-                      "organizationName":"geomedia",
-                      "startDate":"2010-06-17",
-                      "endDate":"2010-01-26"
-                   }]
-    } ]
-
-`GleambookMessages` collection (or, dataset):
-
-    [ {
-      "messageId":2,
-      "authorId":1,
-      "inResponseTo":4,
-      "senderLocation":[41.66,80.87],
-      "message":" dislike x-phone its touch-screen is horrible"
-    },
-    {
-      "messageId":3,
-      "authorId":2,
-      "inResponseTo":4,
-      "senderLocation":[48.09,81.01],
-      "message":" like product-y the plan is amazing"
-    },
-    {
-      "messageId":4,
-      "authorId":1,
-      "inResponseTo":2,
-      "senderLocation":[37.73,97.04],
-      "message":" can't stand acast the network is horrible:("
-    },
-    {
-      "messageId":6,
-      "authorId":2,
-      "inResponseTo":1,
-      "senderLocation":[31.5,75.56],
-      "message":" like product-z its platform is mind-blowing"
-    }
-    {
-      "messageId":8,
-      "authorId":1,
-      "inResponseTo":11,
-      "senderLocation":[40.33,80.87],
-      "message":" like ccast the 3G is awesome:)"
-    },
-    {
-      "messageId":10,
-      "authorId":1,
-      "inResponseTo":12,
-      "senderLocation":[42.5,70.01],
-      "message":" can't stand product-w the touch-screen is terrible"
-    },
-    {
-      "messageId":11,
-      "authorId":1,
-      "inResponseTo":1,
-      "senderLocation":[38.97,77.49],
-      "message":" can't stand acast its plan is terrible"
-    } ]
-
 ## <a id="Select_clauses">SELECT Clause</a>
-The `SELECT` clause always returns a collection value as its result (even if the result is empty or a singleton).
+---
 
-### <a id="Select_element">Select Element/Value/Raw</a>
-The `SELECT VALUE` clause returns an array or multiset that contains the results of evaluating the `VALUE`
-expression, with one evaluation being performed per "binding tuple" (i.e., per `FROM` clause item) satisfying
-the statement's selection criteria.
-For historical reasons the query language also allows the keywords `ELEMENT` or `RAW` to be used in place of `VALUE`
-(not recommended).
+### SelectClause
+**![](../images/diagrams/SelectClause.png)**
 
-If there is no FROM clause, the expression after `VALUE` is evaluated once with no binding tuples
+
+##### Synonyms for `VALUE`: `ELEMENT`, `RAW`
+---
+
+In a query block, the `FROM`, `WHERE`, `GROUP BY`, and `HAVING` clauses (if present) are collectively called the Stream Generator. All these clauses, taken together, generate a stream of tuples of bound variables. The `SELECT` clause then uses these bound variables to generate the output of the query block.
+
+For example, the clause `FROM customers AS c` scans over the `customers` collection, binding the variable `c` to each customer` object in turn, producing a stream of bindings.
+
+Here's a slightly more complex example of a stream generator:
+
+##### Example
+	FROM customers AS c, orders AS o
+	WHERE c.custid = o.custid
+
+In this example, the `FROM` clause scans over the customers and orders collections, producing a stream of variable pairs (`c`, `o`) in which `c` is bound to a `customer` object and `o` is bound to an `order` object. The `WHERE` clause then retains only those pairs in which the custid values of the two objects match.
+
+The output of the query block is a collection containing one output item for each tuple produced by the stream generator. If the stream generator produces no tuples, the output of the query block is an empty collection. Depending on the `SELECT` clause, each output item may be an object or some other kind of value.
+
+In addition to using the variables bound by previous clauses, the `SELECT` clause may create and bind some additional variables. For example, the clause `SELECT salary + bonus AS pay` creates the variable `pay` and binds it to the value of `salary + bonus`. This variable may then be used in a later `ORDER BY` clause.
+
+In SQL++, the `SELECT` clause may appear either at the beginning or at the end of a query block. Since the `SELECT` clause depends on variables that are bound in the other clauses, the examples in this section place `SELECT` at the end of the query blocks.
+
+### <a id="Select_element">SELECT VALUE</a>
+
+	 
+The `SELECT VALUE` clause returns an array or multiset that contains the results of evaluating the `VALUE` expression, with one evaluation being performed per "binding tuple" (i.e., per `FROM` clause item) satisfying the statement's selection criteria.
+If there is no `FROM` clause, the expression after `VALUE` is evaluated once with no binding tuples
 (except those inherited from an outer environment).
 
 ##### Example
+(Q3.1)
 
     SELECT VALUE 1;
 
-This query returns:
+Result:
 
     [
-      1
+       1
     ]
 
-The following example shows a query that selects one user from the GleambookUsers collection.
-
 ##### Example
+(Q3.2) The following query returns the names of all customers whose rating is above 650.
 
-    SELECT VALUE user
-    FROM GleambookUsers user
-    WHERE user.id = 1;
+    FROM customers AS c
+    WHERE c.rating > 650
+    SELECT VALUE name;
 
-This query returns:
+Result:
 
-    [{
-        "userSince": "2012-08-20T10:10:00.000Z",
-        "friendIds": [
-            2,
-            3,
-            6,
-            10
-        ],
-        "gender": "F",
-        "name": "MargaritaStoddard",
-        "nickname": "Mags",
-        "alias": "Margarita",
-        "id": 1,
-        "employment": [
-            {
-                "organizationName": "Codetechno",
-                "start-date": "2006-08-06"
-            },
-            {
-                "end-date": "2010-01-26",
-                "organizationName": "geomedia",
-                "start-date": "2010-06-17"
-            }
-        ]
-    } ]
+    RESULT:
+    [
+        "T. Cody",
+        "M. Sinclair",
+        "T. Henry"
+    ]
+
 
 ### <a id="SQL_select">SQL-style SELECT</a>
-The traditional SQL-style `SELECT` syntax is also supported in the query language.
-This syntax can also be reformulated in a `SELECT VALUE` based manner.
-(E.g., `SELECT expA AS fldA, expB AS fldB` is syntactic sugar for `SELECT VALUE { 'fldA': expA, 'fldB': expB }`.)
-Unlike in SQL, the result of a query does not preserve the order of expressions in the `SELECT` clause.
+
+Traditional SQL-style `SELECT` syntax is also supported in SQL++, however the result of a query is not guaranteed to preserve the order of expressions in the `SELECT` clause.
 
 ##### Example
-    SELECT user.alias user_alias, user.name user_name
-    FROM GleambookUsers user
-    WHERE user.id = 1;
+(Q3.3) The following query returns the names and customers ids of any customers whose rating is 750.
+
+    FROM customers AS c
+    WHERE c.rating = 750
+    SELECT c.name AS customer_name, c.custid AS customer_id;
 
 Returns:
 
-    [ {
-        "user_name": "MargaritaStoddard",
-        "user_alias": "Margarita"
-    } ]
-
+    [
+        {
+            "customer_id": "C13",
+            "customer_name": "T. Cody"
+        },
+        {
+            "customer_id": "C37",
+            "customer_name": "T. Henry"
+        }
+    ]
+    
 ### <a id="Select_star">SELECT *</a>
-`SELECT *` returns an object with a nested field for each input tuple.
-Each field has as its field name the name of a binding variable generated by either the `FROM` clause or `GROUP BY`
-clause in the current enclosing `SELECT` statement, and its field value is the value of that binding variable.
 
-Note that the result of `SELECT *` is different from the result of query that selects all the fields of an object.
+As in SQL, the phrase `SELECT *` suggests, "select everything."
+
+
+For each binding tuple in the stream, `SELECT *` produces an output object. For each variable in the binding tuple, the output object contains a field: the name of the field is the name of the variable, and the value of the field is the value of the variable. Essentially, `SELECT *` means, "return all the bound variables, with their names and values."
+
+
+The effect of `SELECT *` can be illustrated by an example based on two small collections named `ages` and `eyes`. The contents of the two collections are as follows:
+
+`ages`:
+
+	[
+	    { "name": "Bill", "age": 21 },
+	    { "name": "Sue", "age": 32 }
+	]
+
+`eyes`:
+
+	[
+	    { "name": "Bill", "eyecolor": "brown" },
+	    { "name": "Sue", "eyecolor": "blue" }
+	]
+
+The following example applies `SELECT *` to a single collection.
 
 ##### Example
 
-    SELECT *
-    FROM GleambookUsers user;
+(Q3.4a) Return all the information in the `ages` collection.
 
-Since `user` is the only binding variable generated in the `FROM` clause, this query returns:
+	FROM ages AS a
+	SELECT * ;
+	
+Result:
 
-    [ {
-        "user": {
-            "userSince": "2012-08-20T10:10:00.000Z",
-            "friendIds": [
-                2,
-                3,
-                6,
-                10
-            ],
-            "gender": "F",
-            "name": "MargaritaStoddard",
-            "nickname": "Mags",
-            "alias": "Margarita",
-            "id": 1,
-            "employment": [
-                {
-                    "organizationName": "Codetechno",
-                    "start-date": "2006-08-06"
-                },
-                {
-                    "end-date": "2010-01-26",
-                    "organizationName": "geomedia",
-                    "start-date": "2010-06-17"
-                }
-            ]
-        }
-    }, {
-        "user": {
-            "userSince": "2011-01-22T10:10:00.000Z",
-            "friendIds": [
-                1,
-                4
-            ],
-            "name": "IsbelDull",
-            "nickname": "Izzy",
-            "alias": "Isbel",
-            "id": 2,
-            "employment": [
-                {
-                    "organizationName": "Hexviafind",
-                    "startDate": "2010-04-27"
-                }
-            ]
-        }
-    }, {
-        "user": {
-            "userSince": "2012-07-10T10:10:00.000Z",
-            "friendIds": [
-                1,
-                5,
-                8,
-                9
-            ],
-            "name": "EmoryUnk",
-            "alias": "Emory",
-            "id": 3,
-            "employment": [
-                {
-                    "organizationName": "geomedia",
-                    "endDate": "2010-01-26",
-                    "startDate": "2010-06-17"
-                }
-            ]
-        }
-    } ]
+	[
+	    { "a": { "name": "Bill", "age": 21 },
+	    },
+	    { "a": { "name": "Sue", "age": 32}
+	    }
+	]
 
+Note that the variable-name `a` appears in the query result. If the `FROM` clause had been simply `FROM ages` (omitting `AS a`), the variable-name in the query result would have been `ages`.
+
+The next example applies `SELECT *` to a join of two collections.
 
 ##### Example
 
-    SELECT *
-    FROM GleambookUsers u, GleambookMessages m
-    WHERE m.authorId = u.id and u.id = 2;
+(Q3.4b) Return all the information in a join of `ages` and `eyes` on matching name fields.
 
-This query does an inner join that we will discuss in [multiple from terms](#Multiple_from_terms).
-Since both `u` and `m` are binding variables generated in the `FROM` clause, this query returns:
+	FROM ages AS a, eyes AS e
+	WHERE a.name = e.name
+	SELECT * ;
 
-    [ {
-        "u": {
-            "userSince": "2011-01-22T10:10:00",
-            "friendIds": [
-                1,
-                4
-            ],
-            "name": "IsbelDull",
-            "nickname": "Izzy",
-            "alias": "Isbel",
-            "id": 2,
-            "employment": [
-                {
-                    "organizationName": "Hexviafind",
-                    "startDate": "2010-04-27"
-                }
-            ]
-        },
-        "m": {
-            "senderLocation": [
-                31.5,
-                75.56
-            ],
-            "inResponseTo": 1,
-            "messageId": 6,
-            "authorId": 2,
-            "message": " like product-z its platform is mind-blowing"
-        }
-    }, {
-        "u": {
-            "userSince": "2011-01-22T10:10:00",
-            "friendIds": [
-                1,
-                4
-            ],
-            "name": "IsbelDull",
-            "nickname": "Izzy",
-            "alias": "Isbel",
-            "id": 2,
-            "employment": [
-                {
-                    "organizationName": "Hexviafind",
-                    "startDate": "2010-04-27"
-                }
-            ]
-        },
-        "m": {
-            "senderLocation": [
-                48.09,
-                81.01
-            ],
-            "inResponseTo": 4,
-            "messageId": 3,
-            "authorId": 2,
-            "message": " like product-y the plan is amazing"
-        }
-    } ]
+Result:
+
+	[
+	    { "a": { "name": "Bill", "age": 21 },
+	      "e": { "name": "Bill", "eyecolor": "Brown" }
+	    },
+	    { "a": { "name": "Sue", "age": 32 },
+	      "e": { "name": "Sue", "eyecolor": "Blue" }
+	    }
+	]
+
+Note that the result of `SELECT *` in SQL++ is more complex than the result of `SELECT *` in SQL.
 
 ### <a id="Select_variable_star">SELECT _variable_.*</a>
 
-Whereas `SELECT *` returns all the fields bound to all the variables which are currently defined,
-the notation `SELECT c.*` returns all the fields of the object bound to variable `c`.
-The variable `c` must be bound to an object for this to work.
+SQL++ has an alternative version of `SELECT *` in which the star is preceded by a variable. Whereas `SELECT *` means, "return all the bound variables, with their names and values," `SELECT` *variable* `.*` means "return only the named variable, and return only its value, not its name."
+
+The following example can be compared with (Q3.4a) to see the difference between the two versions of `SELECT *`:
 
 ##### Example
 
-    SELECT user.*
-    FROM GleambookUsers user;
+(Q3.4c) Return all information in the `ages` collection.
 
-Compare this query with the first example given under [SELECT *](#Select_star).
-This query returns all users from the `GleambookUsers` dataset,
-but the `user` variable name is omitted from the results:
+	FROM ages AS a
+	SELECT a.*
+
+Result:
+
+	[
+	    { "name": "Bill", "age": 21 },
+	    { "name": "Sue", "age": 32 }
+	]
+	
+Note that, for queries over a single collection,  `SELECT` *variable* `.*` returns a simpler result and therefore may be preferable to `SELECT *`. In fact,  `SELECT` *variable* `.*`, like `SELECT *` in SQL, is equivalent to a `SELECT` clause that enumerates all the fields of the collection, as in (Q3.4d):
+
+##### Example
+
+(Q3.4d) Return all the information in the `ages` collection. 
+
+	FROM ages AS a
+	SELECT a.name, a.age
+
+(same result as (Q3.4c))
+
+ `SELECT` *variable* `.*` has an additional application. It can be used to return all the fields of a nested object. To illustrate this use, we will use the `customers` dataset in the example database (see Appendix 4).
+
+##### Example
+(Q3.4e) In the `customers` dataset, return all the fields of the `address` objects that have zipcode "02340".
+
+    FROM customers AS c
+    WHERE c.address.zipcode = "02340"
+    SELECT address.*  ;
+
+
+Result:
 
     [
-      {
-        "id": 1,
-        "alias": "Margarita",
-        "name": "MargaritaStoddard",
-        "nickname": "Mags",
-        "userSince": "2012-08-20T10:10:00",
-        "friendIds": [
-          2,
-          3,
-          6,
-          10
-        ],
-        "employment": [
-          {
-            "organizationName": "Codetechno",
-            "start-date": "2006-08-06"
-          },
-          {
-            "organizationName": "geomedia",
-            "start-date": "2010-06-17",
-            "end-date": "2010-01-26"
-          }
-        ],
-        "gender": "F"
-      },
-      {
-        "id": 2,
-        "alias": "Isbel",
-        "name": "IsbelDull",
-        "nickname": "Izzy",
-        "userSince": "2011-01-22T10:10:00",
-        "friendIds": [
-          1,
-          4
-        ],
-        "employment": [
-          {
-            "organizationName": "Hexviafind",
-            "startDate": "2010-04-27"
-          }
-        ]
-      },
-      {
-        "id": 3,
-        "alias": "Emory",
-        "name": "EmoryUnk",
-        "userSince": "2012-07-10T10:10:00",
-        "friendIds": [
-          1,
-          5,
-          8,
-          9
-        ],
-        "employment": [
-          {
-            "organizationName": "geomedia",
-            "startDate": "2010-06-17",
-            "endDate": "2010-01-26"
-          }
-        ]
-      }
+        {
+            "street": "690 River St.",
+            "city": "Hanover, MA",
+            "zipcode": "02340"
+        }
     ]
 
+
 ### <a id="Select_distinct">SELECT DISTINCT</a>
-The `DISTINCT` keyword is used to eliminate duplicate items in results. The following example shows how it works.
+The `DISTINCT` keyword is used to eliminate duplicate items from the results of a query block. 
 
 ##### Example
 
-    SELECT DISTINCT * FROM [1, 2, 2, 3] AS foo;
+(Q3.5) Returns all of the different cities in the `customers` dataset.
 
-This query returns:
+    FROM customers AS c
+    SELECT DISTINCT c.address.city;
 
-    [ {
-        "foo": 1
-    }, {
-        "foo": 2
-    }, {
-        "foo": 3
-    } ]
-
-##### Example
-
-    SELECT DISTINCT VALUE foo FROM [1, 2, 2, 3] AS foo;
-
-This version of the query returns:
-
-    [ 1
-    , 2
-    , 3
-     ]
+Result:
+    
+    [
+        {
+            "city": "Boston, MA"
+        },
+        {
+            "city": "Hanover, MA"
+        },
+        {
+            "city": "St. Louis, MO"
+        },
+        {
+            "city": "Rome, Italy"
+        }
+    ]   
 
 ### <a id="Unnamed_projections">Unnamed Projections</a>
-Similar to standard SQL, the query language  supports unnamed projections (a.k.a, unnamed `SELECT` clause items), for which names are generated.
+Similar to standard SQL, the query language supports unnamed projections (a.k.a, unnamed `SELECT` clause items), for which names are generated rather than user-provided.
 Name generation has three cases:
 
   * If a projection expression is a variable reference expression, its generated name is the name of the variable.
@@ -510,1605 +260,1195 @@
 
 ##### Example
 
-    SELECT substr(user.name, 10), user.alias
-    FROM GleambookUsers user
-    WHERE user.id = 1;
+(Q3.6) Returns the last digit and the order date of all orders for the customer whose ID is "C41".
 
-This query outputs:
+    FROM orders AS o
+    WHERE o.custid = "C41"
+    SELECT o.orderno % 1000,  o.order_date;
 
-    [ {
-        "alias": "Margarita",
-        "$1": "Stoddard"
-    } ]
-
-In the result, `$1` is the generated name for `substr(user.name, 1)`, while `alias` is the generated name for `user.alias`.
-
-### <a id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a>
-As in standard SQL, field access expressions can be abbreviated (not recommended!) when there is no ambiguity. In the next example, the variable `user` is the only possible variable reference for fields `id`, `name` and `alias` and thus could be omitted in the query. More information on abbbreviated field access can be found in the appendix section on Variable Resolution.
-
-##### Example
-
-    SELECT substr(name, 10) AS lname, alias
-    FROM GleambookUsers user
-    WHERE id = 1;
-
-Outputs:
-
-    [ {
-        "lname": "Stoddard",
-        "alias": "Margarita"
-    } ]
-
-## <a id="Unnest_clauses">UNNEST Clause</a>
-For each of its input tuples, the `UNNEST` clause flattens a collection-valued expression into individual items, producing multiple tuples, each of which is one of the expression's original input tuples augmented with a flattened item from its collection.
-
-### <a id="Inner_unnests">Inner UNNEST</a>
-The following example is a query that retrieves the names of the organizations that a selected user has worked for. It uses the `UNNEST` clause to unnest the nested collection `employment` in the user's object.
-
-##### Example
-
-    SELECT u.id AS userId, e.organizationName AS orgName
-    FROM GleambookUsers u
-    UNNEST u.employment e
-    WHERE u.id = 1;
-
-This query returns:
-
-    [ {
-        "orgName": "Codetechno",
-        "userId": 1
-    }, {
-        "orgName": "geomedia",
-        "userId": 1
-    } ]
-
-Note that `UNNEST` has SQL's inner join semantics --- that is, if a user has no employment history, no tuple corresponding to that user will be emitted in the result.
-
-### <a id="Left_outer_unnests">Left Outer UNNEST</a>
-As an alternative, the `LEFT OUTER UNNEST` clause offers SQL's left outer join semantics. For example, no collection-valued field named `hobbies` exists in the object for the user whose id is 1, but the following query's result still includes user 1.
-
-##### Example
-
-    SELECT u.id AS userId, h.hobbyName AS hobby
-    FROM GleambookUsers u
-    LEFT OUTER UNNEST u.hobbies h
-    WHERE u.id = 1;
-
-Returns:
-
-    [ {
-        "userId": 1
-    } ]
-
-Note that if `u.hobbies` is an empty collection or leads to a `MISSING` (as above) or `NULL` value for a given input tuple, there is no corresponding binding value for variable `h` for an input tuple. A `MISSING` value will be generated for `h` so that the input tuple can still be propagated.
-
-### <a id="Expressing_joins_using_unnests">Expressing Joins Using UNNEST</a>
-The `UNNEST` clause is similar to SQL's `JOIN` clause except that it allows its right argument to be correlated to its left argument, as in the examples above --- i.e., think "correlated cross-product".
-The next example shows this via a query that joins two data sets, GleambookUsers and GleambookMessages, returning user/message pairs. The results contain one object per pair, with result objects containing the user's name and an entire message. The query can be thought of as saying "for each Gleambook user, unnest the `GleambookMessages` collection and filter the output with the condition `message.authorId = user.id`".
-
-##### Example
-
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u
-    UNNEST GleambookMessages m
-    WHERE m.authorId = u.id;
-
-This returns:
-
-    [ {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand acast its plan is terrible"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " dislike x-phone its touch-screen is horrible"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand acast the network is horrible:("
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " like ccast the 3G is awesome:)"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand product-w the touch-screen is terrible"
-    }, {
-        "uname": "IsbelDull",
-        "message": " like product-z its platform is mind-blowing"
-    }, {
-        "uname": "IsbelDull",
-        "message": " like product-y the plan is amazing"
-    } ]
-
-Similarly, the above query can also be expressed as the `UNNEST`ing of a correlated subquery:
-
-##### Example
-
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u
-    UNNEST (
-        SELECT VALUE msg
-        FROM GleambookMessages msg
-        WHERE msg.authorId = u.id
-    ) AS m;
-
-## <a id="From_clauses">FROM clauses</a>
-A `FROM` clause is used for enumerating (i.e., conceptually iterating over) the contents of collections, as in SQL.
-
-### <a id="Binding_expressions">Binding expressions</a>
-In addition to stored collections, a `FROM` clause can iterate over any intermediate collection returned by a valid query expression.
-In the tuple stream generated by a `FROM` clause, the ordering of the input tuples are not guaranteed to be preserved.
-
-##### Example
-
-    SELECT VALUE foo
-    FROM [1, 2, 2, 3] AS foo
-    WHERE foo > 2;
-
-Returns:
+Result:
 
     [
-      3
+        {
+            "$1": 1,
+            "order_date": "2020-04-29"
+        },
+        {
+            "$1": 6,
+            "order_date": "2020-09-02"
+        }
     ]
 
-### <a id="Multiple_from_terms">Multiple FROM Terms</a>
-The query language permits correlations among `FROM` terms. Specifically, a `FROM` binding expression can refer to variables defined to its left in the given `FROM` clause. Thus, the first unnesting example above could also be expressed as follows:
+In the result, `$1` is the generated name for `o.orderno % 1000`, while `order_date` is the generated name for `o.order_date`. It is good practice, however, to not rely on the randomly generated names which can be confusing and irrelevant. Instead, practice good naming conventions by providing a meaningful and concise name which properly describes the selected item.
+
+### <a id="Abbreviated_field_access_expressions">Abbreviated Field Access Expressions</a>
+As in standard SQL, field access expressions can be abbreviated when there is no ambiguity. In the next example, the variable `o` is the only possible variable reference for fields `orderno` and `order_date` and thus could be omitted in the query. This practice is not recommended, however, as queries may have fields (such as  `custid`) which can be present in multiple datasets. More information on abbbreviated field access can be found in the appendix section on Variable Resolution.
 
 ##### Example
 
-    SELECT u.id AS userId, e.organizationName AS orgName
-    FROM GleambookUsers u, u.employment e
-    WHERE u.id = 1;
+(Q3.7) Same as Q3.6, omitting the variable reference for the order number and date and providing custom names for `SELECT` clause items. 
+
+    FROM orders AS o
+    WHERE o.custid = "C41" 
+    SELECT orderno % 1000 AS last_digit, order_date;
+
+Result:
+
+    [
+        {
+            "last_digit": 1,
+            "order_date": "2020-04-29"
+        },
+        {
+            "last_digit": 6,
+            "order_date": "2020-09-02"
+        }
+    ]
 
 
-### <a id="Expressing_joins_using_from_terms">Expressing Joins Using FROM Terms</a>
-Similarly, the join intentions of the other `UNNEST`-based join examples above could be expressed as:
+## <a id="From_clauses">FROM clause</a>
+
+---
+
+### FromClause
+**![](../images/diagrams/FromClause.png)**
+
+### FromTerm
+**![](../images/diagrams/FromTerm.png)**
+
+### NamedExpr
+**![](../images/diagrams/NamedExpr.png)**
+
+### JoinStep
+**![](../images/diagrams/JoinStep.png)**
+
+
+##### Synonyms for `UNNEST`: `CORRELATE`, `FLATTEN`
+---
+
+The purpose of a `FROM` clause is to iterate over a collection, binding a variable to each item in turn. Here's a query that iterates over the `customers` dataset, choosing certain customers and returning some of their attributes. 
+
+##### Example
+  
+(Q3.8) List the customer ids and names of the customers in zipcode 63101, in order by their customer IDs.
+
+  
+
+    FROM customers
+    WHERE address.zipcode = "63101"
+    SELECT custid AS customer_id, name
+    ORDER BY customer_id;
+
+  
+
+Result:
+
+  
+
+    [
+        {
+            "customer_id": "C13",
+            "name": "T. Cody"
+        },
+        {
+            "customer_id": "C31",
+            "name": "B. Pruitt"
+        },
+        {
+            "customer_id": "C41",
+            "name": "R. Dodge"
+        }
+    ]
+      
+
+Let's take a closer look at what this `FROM` clause is doing. A `FROM` clause always produces a stream of bindings, in which an iteration variable is bound in turn to each item in a collection. In Q3.8, since no explicit iteration variable is provided, the `FROM` clause defines an implicit variable named `customers`, the same name as the dataset that is being iterated over. The implicit iteration variable serves as the object-name for all field-names in the query block that do not have explicit object-names. Thus, `address.zipcode` really means `customers.address.zipcode`, `custid` really means `customers.custid`, and `name` really means `customers.name`.
+
+You may also provide an explicit iteration variable, as in this version of the same query:
+
+##### Example  
+
+(Q3.9) Alternative version of Q3.8 (same result).
+
+  
+
+    FROM customers AS c
+    WHERE c.address.zipcode = "63101"
+    SELECT c.custid AS customer_id, c.name
+    ORDER BY customer_id;
+
+  
+In Q3.9, the variable `c` is bound to each `customer` object in turn as the query iterates over the `customers` dataset. An explicit iteration variable can be used to identify the fields of the referenced object, as in `c.name` in the `SELECT` clause of Q3.9. When referencing a field of an object, the iteration variable can be omitted when there is no ambiguity. For example, `c.name` could be replaced by `name` in the `SELECT` clause of Q3.9. That's why field-names like `name` and `custid` could stand by themselves in the Q3.8 version of this query.
+
+  
+
+In the examples above, the `FROM` clause iterates over the objects in a dataset. But in general, a `FROM` clause can iterate over any collection. For example, the objects in the `orders` dataset each contain a field called `items`, which is an array of nested objects. In some cases, you will write a `FROM` clause that iterates over a nested array like `items`.
+
+  
+The stream of objects (more accurately, variable bindings) that is produced by the `FROM` clause does not have any particular order. The system will choose the most efficient order for the iteration. If you want your query result to have a specific order, you must use an `ORDER BY` clause.
+
+  
+It's good practice to specify an explicit iteration variable for each collection in the `FROM` clause, and to use these variables to qualify the field-names in other clauses. Here are some reasons for this convention:
+
+  
+-   It's nice to have different names for the collection as a whole and an object in the collection. For example, in the clause `FROM customers AS c`, the name `customers` represents the dataset and the name `c` represents one object in the dataset.
+    
+-   In some cases, iteration variables are required. For example, when joining a dataset to itself, distinct iteration variables are required to distinguish the left side of the join from the right side.
+    
+-   In a subquery it's sometimes necessary to refer to an object in an outer query block (this is called a *correlated subquery*). To avoid confusion in correlated subqueries, it's best to use explicit variables.
+    
+
+### <a id="Left_outer_unnests">Joins</a>
+
+A `FROM` clause gets more interesting when there is more than one collection involved. The following query iterates over two collections: `customers` and `orders`. The `FROM` clause produces a stream of binding tuples, each containing two variables, `c` and `o`. In each binding tuple, `c` is bound to an object from `customers`, and `o` is bound to an object from `orders`. Conceptually, at this point, the binding tuple stream contains all possible pairs of a customer and an order (this is called the *Cartesian product* of `customers` and `orders`). Of course, we are interested only in pairs where the `custid` fields match, and that condition is expressed in the `WHERE` clause, along with the restriction that the order number must be 1001.
 
 ##### Example
 
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u, GleambookMessages m
-    WHERE m.authorId = u.id;
+(Q3.10) Create a packing list for order number 1001, showing the customer name and address and all the items in the order.
 
-##### Example
+ 
+    FROM customers AS c, orders AS o
+    WHERE c.custid = o.custid
+    AND o.orderno = 1001
+    SELECT o.orderno,
+        c.name AS customer_name,
+        c.address,
+        o.items AS items_ordered;
 
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u,
-      (
-        SELECT VALUE msg
-        FROM GleambookMessages msg
-        WHERE msg.authorId = u.id
-      ) AS m;
+  
+Result:
 
-Note that the first alternative is one of the SQL-92 approaches to expressing a join.
-
-### <a id="Implicit_binding_variables">Implicit Binding Variables</a>
-
-Similar to standard SQL, the query language supports implicit `FROM` binding variables (i.e., aliases), for which a binding variable is generated.
-Variable generation falls into three cases:
-
-  * If the binding expression is a variable reference expression, the generated variable's name will be the name of the referenced variable itself.
-  * If the binding expression is a field access expression (or a fully qualified name for a dataset), the generated
-    variable's name will be the last identifier (or the dataset name) in the expression.
-  * For all other cases, a compilation error will be raised.
-
-The next two examples show queries that do not provide binding variables in their `FROM` clauses.
-
-##### Example
-
-    SELECT GleambookUsers.name, GleambookMessages.message
-    FROM GleambookUsers, GleambookMessages
-    WHERE GleambookMessages.authorId = GleambookUsers.id;
-
-Returns:
-
-    [ {
-        "name": "MargaritaStoddard",
-        "message": " like ccast the 3G is awesome:)"
-    }, {
-        "name": "MargaritaStoddard",
-        "message": " can't stand product-w the touch-screen is terrible"
-    }, {
-        "name": "MargaritaStoddard",
-        "message": " can't stand acast its plan is terrible"
-    }, {
-        "name": "MargaritaStoddard",
-        "message": " dislike x-phone its touch-screen is horrible"
-    }, {
-        "name": "MargaritaStoddard",
-        "message": " can't stand acast the network is horrible:("
-    }, {
-        "name": "IsbelDull",
-        "message": " like product-y the plan is amazing"
-    }, {
-        "name": "IsbelDull",
-        "message": " like product-z its platform is mind-blowing"
-    } ]
-
-##### Example
-
-    SELECT GleambookUsers.name, GleambookMessages.message
-    FROM GleambookUsers,
-      (
-        SELECT VALUE GleambookMessages
-        FROM GleambookMessages
-        WHERE GleambookMessages.authorId = GleambookUsers.id
-      );
-
-Returns:
-
-    Error: "Syntax error: Need an alias for the enclosed expression:\n(select element GleambookMessages\n    from GleambookMessages as GleambookMessages\n    where (GleambookMessages.authorId = GleambookUsers.id)\n )",
-        "query_from_user": "use TinySocial;\n\nSELECT GleambookUsers.name, GleambookMessages.message\n    FROM GleambookUsers,\n      (\n        SELECT VALUE GleambookMessages\n        FROM GleambookMessages\n        WHERE GleambookMessages.authorId = GleambookUsers.id\n      );"
-
-More information on implicit binding variables can be found in the appendix section on Variable Resolution.
-
-## <a id="Join_clauses">JOIN Clauses</a>
-The join clause in the query language supports both inner joins and left outer joins from standard SQL.
-
-### <a id="Inner_joins">Inner joins</a>
-Using a `JOIN` clause, the inner join intent from the preceding examples can also be expressed as follows:
-
-##### Example
-
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
-
-### <a id="Left_outer_joins">Left Outer Joins</a>
-The query language supports SQL's notion of left outer join. The following query is an example:
-
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u LEFT OUTER JOIN GleambookMessages m ON m.authorId = u.id;
-
-Returns:
-
-    [ {
-        "uname": "MargaritaStoddard",
-        "message": " like ccast the 3G is awesome:)"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand product-w the touch-screen is terrible"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand acast its plan is terrible"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " dislike x-phone its touch-screen is horrible"
-    }, {
-        "uname": "MargaritaStoddard",
-        "message": " can't stand acast the network is horrible:("
-    }, {
-        "uname": "IsbelDull",
-        "message": " like product-y the plan is amazing"
-    }, {
-        "uname": "IsbelDull",
-        "message": " like product-z its platform is mind-blowing"
-    }, {
-        "uname": "EmoryUnk"
-    } ]
-
-For non-matching left-side tuples, the query language produces `MISSING` values for the right-side binding variables; that is why the last object in the above result doesn't have a `message` field. Note that this is slightly different from standard SQL, which instead would fill in `NULL` values for the right-side fields. The reason for this difference is that, for non-matches in its join results, the query language views fields from the right-side as being "not there" (a.k.a. `MISSING`) instead of as being "there but unknown" (i.e., `NULL`).
-
-The left-outer join query can also be expressed using `LEFT OUTER UNNEST`:
-
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u
-    LEFT OUTER UNNEST (
-        SELECT VALUE message
-        FROM GleambookMessages message
-        WHERE message.authorId = u.id
-      ) m;
-
-In general, SQL-style join queries can also be expressed by `UNNEST` clauses and left outer join queries can be expressed by `LEFT OUTER UNNESTs`.
-
-### <a id="Join_variable_scope">Variable scope in JOIN clauses</a>
-
-Variables defined by `JOIN` subclauses are not visible to other subclauses in the same `FROM` clause.
-This also applies to the `FROM` variable that starts the `JOIN` subclause.
-
-##### Example
-
-    SELECT * FROM GleambookUsers u
-    JOIN (SELECT VALUE m
-          FROM GleambookMessages m
-          WHERE m.authorId = u.id) m
-    ON u.id = m.authorId;
-
-The variable `u` defined by the `FROM` clause is not visible inside the `JOIN` subclause,
-so this query returns no results.
-
-## <a id="Group_By_clauses">GROUP BY Clauses</a>
-The `GROUP BY` clause generalizes standard SQL's grouping and aggregation semantics, but it also retains backward compatibility with the standard (relational) SQL `GROUP BY` and aggregation features.
-
-### <a id="Group_variables">Group variables</a>
-In a `GROUP BY` clause, in addition to the binding variable(s) defined for the grouping key(s), the query language allows a user to define a *group variable* by using the clause's `GROUP AS` extension to denote the resulting group.
-After grouping, then, the query's in-scope variables include the grouping key's binding variables as well as this group variable which will be bound to one collection value for each group. This per-group collection (i.e., multiset) value will be a set of nested objects in which each field of the object is the result of a renamed variable defined in parentheses following the group variable's name. The `GROUP AS` syntax is as follows:
-
-    <GROUP> <AS> Variable ("(" VariableReference <AS> Identifier ("," VariableReference <AS> Identifier )* ")")?
-
-##### Example
-
-    SELECT *
-    FROM GleambookMessages message
-    GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
-
-This first example query returns:
-
-    [ {
-        "msgs": [
-            {
-                "msg": {
-                    "senderLocation": [
-                        38.97,
-                        77.49
-                    ],
-                    "inResponseTo": 1,
-                    "messageId": 11,
-                    "authorId": 1,
-                    "message": " can't stand acast its plan is terrible"
-                }
+    [
+        {
+            "orderno": 1001,
+            "customer_name": "R. Dodge",
+            "address": {
+                "street": "150 Market St.",
+                "city": "St. Louis, MO",
+                "zipcode": "63101"
             },
-            {
-                "msg": {
-                    "senderLocation": [
-                        41.66,
-                        80.87
-                    ],
-                    "inResponseTo": 4,
-                    "messageId": 2,
-                    "authorId": 1,
-                    "message": " dislike x-phone its touch-screen is horrible"
+            "items_ordered": [
+                {
+                    "itemno": 347,
+                    "qty": 5,
+                    "price": 19.99
+                },
+                {
+                    "itemno": 193,
+                    "qty": 2,
+                    "price": 28.89
                 }
-            },
-            {
-                "msg": {
-                    "senderLocation": [
-                        37.73,
-                        97.04
-                    ],
-                    "inResponseTo": 2,
-                    "messageId": 4,
-                    "authorId": 1,
-                    "message": " can't stand acast the network is horrible:("
-                }
-            },
-            {
-                "msg": {
-                    "senderLocation": [
-                        40.33,
-                        80.87
-                    ],
-                    "inResponseTo": 11,
-                    "messageId": 8,
-                    "authorId": 1,
-                    "message": " like ccast the 3G is awesome:)"
-                }
-            },
-            {
-                "msg": {
-                    "senderLocation": [
-                        42.5,
-                        70.01
-                    ],
-                    "inResponseTo": 12,
-                    "messageId": 10,
-                    "authorId": 1,
-                    "message": " can't stand product-w the touch-screen is terrible"
-                }
+            ]
+        }
+    ]
+
+  
+
+Q3.10 is called a *join query* because it joins the `customers` collection and the `orders` collection, using the join condition `c.custid = o.custid`. In SQL++, as in SQL, you can express this query more explicitly by a `JOIN` clause that includes the join condition, as follows:
+
+  
+##### Example
+
+(Q3.11) Alternative statement of Q3.10 (same result).
+
+
+    FROM customers AS c JOIN orders AS o
+        ON c.custid = o.custid
+    WHERE o.orderno = 1001
+    SELECT o.orderno,
+        c.name AS customer_name,
+        c.address,
+        o.items AS items_ordered;
+
+  
+Whether you express the join condition in a `JOIN` clause or in a `WHERE` clause is a matter of taste; the result is the same. This manual will generally use a comma-separated list of collection-names in the `FROM` clause, leaving the join condition to be expressed elsewhere. As we'll soon see, in some query blocks the join condition can be omitted entirely.
+
+  
+There is, however, one case in which an explicit `JOIN` clause is necessary. That is when you need to join collection A to collection B, and you want to make sure that every item in collection A is present in the query result, even if it doesn't match any item in collection B. This kind of query is called a *left outer join*, and it is illustrated by the following example.
+
+##### Example
+
+(Q3.12) List the customer ID and name, together with the order numbers and dates of their orders (if any) of customers T. Cody and M. Sinclair.
+
+    FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
+    WHERE c.name = "T. Cody"
+       OR c.name = "M. Sinclair"
+    SELECT c.custid, c.name, o.orderno, o.order_date
+    ORDER BY c.custid, o.order_date;
+
+  
+
+Result:
+
+    [
+        {
+            "custid": "C13",
+            "orderno": 1002,
+            "name": "T. Cody",
+            "order_date": "2020-05-01"
+        },
+        {
+            "custid": "C13",
+            "orderno": 1007,
+            "name": "T. Cody",
+            "order_date": "2020-09-13"
+        },
+        {
+            "custid": "C13",
+            "orderno": 1008,
+            "name": "T. Cody",
+            "order_date": "2020-10-13"
+        },
+        {
+            "custid": "C13",
+            "orderno": 1009,
+            "name": "T. Cody",
+            "order_date": "2020-10-13"
+        },
+        {
+            "custid": "C25",
+            "name": "M. Sinclair"
+        }
+    ]
+  
+
+As you can see from the result of this left outer join, our data includes four orders from customer T. Cody, but no orders from customer M. Sinclair. The behavior of left outer join in SQL++ is different from that of SQL. SQL would have provided M. Sinclair with an order in which all the fields were `null`. SQL++, on the other hand, deals with schemaless data, which permits it to simply omit the order fields from the outer join.
+
+Now we're ready to look at a new kind of join that was not provided (or needed) in original SQL. Consider this query:
+
+##### Example  
+
+(Q3.13) For every case in which an item is ordered in a quantity greater than 100, show the order number, date, item number, and quantity.
+
+  
+
+    FROM orders AS o, o.items AS i
+    WHERE i.qty > 100
+    SELECT o.orderno, o.order_date, i.itemno AS item_number,
+        i.qty AS quantity
+    ORDER BY o.orderno, item_number;
+
+Result:
+
+    [
+        {
+            "orderno": 1002,
+            "order_date": "2020-05-01",
+            "item_number": 680,
+            "quantity": 150
+        },
+        {
+            "orderno": 1005,
+            "order_date": "2020-08-30",
+            "item_number": 347,
+            "quantity": 120
+        },
+        {
+            "orderno": 1006,
+            "order_date": "2020-09-02",
+            "item_number": 460,
+            "quantity": 120
+        }
+    ]
+  
+
+Q3.13 illustrates a feature called *left-correlation* in the `FROM` clause. Notice that we are joining `orders`, which is a dataset, to `items`, which is an array nested inside each order. In effect, for each order, we are unnesting the `items` array and joining it to the `order` as though it were a separate collection. For this reason, this kind of query is sometimes called an *unnesting query*. The keyword `UNNEST` may be used whenever left-correlation is used in a `FROM` clause, as shown in this example:
+
+	
+                           
+##### Example 
+
+(Q3.14) Alternative statement of Q3.13 (same result).
+
+    FROM orders AS o UNNEST o.items AS i
+    WHERE i.qty > 100
+    SELECT o.orderno, o.order_date, i.itemno AS item_number,
+            i.qty AS quantity
+    ORDER BY o.orderno, item_number;
+
+  
+The results of Q3.13 and Q3.14 are exactly the same. `UNNEST` serves as a reminder that left-correlation is being used to join an object with its nested items. The join condition in Q3.14 is expressed by the left-correlation: each order `o` is joined to its own items, referenced as `o.items`. The result of the `FROM` clause is a stream of binding tuples, each containing two variables, `o` and `i`. The variable `o` is bound to an order and the variable `i` is bound to one item inside that order.
+
+Like `JOIN`, `UNNEST` has a `LEFT OUTER` option. Q3.14 could have specified:
+
+  
+
+	FROM orders AS o LEFT OUTER UNNEST o.items AS i
+
+  
+
+In this case, orders that have no nested items would appear in the query result.
+
+## <a id="Let_clauses">LET Clause</a>
+
+---
+
+### LetClause
+**![](../images/diagrams/LetClause.png)**
+
+##### Synonyms for `LET`: `LETTING`
+---
+
+
+ `LET` clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The word `LETTING` can also be used, although this is not as common. The next query shows an example.
+
+##### Example
+    
+(Q3.15) For each item in an order, the revenue is defined as the quantity times the price of that item. Find individual items for which the revenue is greater than 5000. For each of these, list the order number, item number, and revenue, in descending order by revenue.
+
+  
+
+    FROM orders AS o, o.items AS i
+    LET revenue = i.qty * i.price
+    WHERE revenue > 5000
+    SELECT o.orderno, i.itemno, revenue
+    ORDER by revenue desc;
+
+Result:
+
+    [
+        {
+            "orderno": 1006,
+            "itemno": 460,
+            "revenue": 11997.6
+        },
+        {
+            "orderno": 1002,
+            "itemno": 460,
+            "revenue": 9594.05
+        },
+        {
+            "orderno": 1006,
+            "itemno": 120,
+            "revenue": 5525
+        }
+    ]
+  
+
+The expression for computing revenue is defined once in the `LET` clause and then used three times in the remainder of the query. Avoiding repetition of the revenue expression makes the query shorter and less prone to errors.
+
+## <a id="Where_having_clauses">WHERE Clause</a>
+
+---
+
+### WhereClause
+**![](../images/diagrams/WhereClause.png)**
+
+
+---
+
+The purpose of a `WHERE` clause is to operate on the stream of binding tuples generated by the `FROM` clause, filtering out the tuples that do not satisfy a certain condition. The condition is specified by an expression based on the variable names in the binding tuples. If the expression evaluates to true, the tuple remains in the stream; if it evaluates to anything else, including `null` or `missing`, it is filtered out. The surviving tuples are then passed along to the next clause to be processed (usually either `GROUP BY` or `SELECT`).
+
+Often, the expression in a `WHERE` clause is some kind of comparison like `quantity > 100`. However, any kind of expression is allowed in a `WHERE` clause. The only thing that matters is whether the expression returns `true` or not.
+
+
+## <a id="Group_By_clauses">Grouping</a>
+
+Grouping is especially important when manipulating hierarchies like the ones that are often found in JSON data. Often you will want to generate output data that includes both summary data and line items within the summaries. For this purpose, SQL++ supports several important extensions to the traditional grouping features of SQL. The familiar `GROUP BY` and `HAVING` clauses are still there, and they are joined by a new clause called `GROUP AS`. We'll illustrate these clauses by a series of examples.
+
+### <a id="Left_outer_unnests">GROUP BY Clause</a>
+
+---
+
+### GroupByClause
+**![](../images/diagrams/GroupByClause.png)**
+
+
+---
+
+We'll begin our discussion of grouping with an example from ordinary SQL.
+
+##### Example
+
+ (Q3.16) List the number of orders placed by each customer who has placed an order.
+
+    SELECT o.custid, COUNT(o.orderno) AS `order count`
+    FROM orders AS o
+    GROUP BY o.custid
+    ORDER BY o.custid;
+
+ Result:
+
+    [
+        {
+            "order count": 4,
+            "custid": "C13"
+        },
+        {
+            "order count": 1,
+            "custid": "C31"
+        },
+        {
+            "order count": 1,
+            "custid": "C35"
+        },
+        {
+            "order count": 1,
+            "custid": "C37"
+        },
+        {
+            "order count": 2,
+            "custid": "C41"
+        }
+    ]
+ The input to a `GROUP BY` clause is the stream of binding tuples generated by the `FROM` and `WHERE`clauses. In this query, before grouping, the variable `o` is bound to each object in the `orders` collection in turn.
+
+ SQL++ evaluates the expression in the `GROUP BY` clause, called the grouping expression, once for each of the binding tuples. It then organizes the results into groups in which the grouping expression has a common value (as defined by the `=` operator). In this example, the grouping expression is `o.custid`, and each of the resulting groups is a set of `orders` that have the same `custid`. If necessary, a group is formed for `orders` in which `custid` is `null`, and another group is formed for `orders` that have no `custid`. This query uses the aggregating function `COUNT(o.orderno)`, which counts how many order numbers are in each group. If we are sure that each order object has a distinct `orderno`, we could also simply count the order objects in each group by using `COUNT(*)` in place of `COUNT(o.orderno)`.
+
+In the `GROUP BY`clause, you may optionally define an alias for the grouping expression. For example, in Q3.16, you could have written `GROUP BY o.custid AS cid`. The alias `cid` could then be used in place of the grouping expression in later clauses. In cases where the grouping expression contains an operator, it is especially helpful to define an alias (for example, `GROUP BY salary + bonus AS pay)`.
+
+ Q3.16 had a single grouping expression, `o.custid`. If a query has multiple grouping expressions, the combination of grouping expressions is evaluated for every binding tuple, and the stream of binding tuples is partitioned into groups that have values in common for all of the grouping expressions. We'll see an example of such a query in Q3.18.
+
+  
+After grouping, the number of binding tuples is reduced: instead of a binding tuple for each of the input objects, there is a binding tuple for each group. The grouping expressions (identified by their aliases, if any) are bound to the results of their evaluations. However, all the non-grouping fields (that is, fields that were not named in the grouping expressions), are accessible only in a special way: as an argument of one of the special aggregation pseudo-functions such as: `SUM`, `AVG`, `MAX`, `MIN`, `STDEV` and `COUNT`. The clauses that come after grouping can access only properties of groups, including the grouping expressions and aggregate properties of the groups such as `COUNT(o.orderno)` or `COUNT(*)`. (We'll see an exception when we discuss the new `GROUP AS` clause.)
+
+You may notice that the results of Q3.16 do not include customers who have no `orders`. If we want to include these `customers`, we need to use an outer join between the `customers` and `orders` collections. This is illustrated by the following example, which also includes the name of each customer.
+
+##### Example
+  
+ (Q3.17) List the number of orders placed by each customer including those customers who have placed no orders.
+
+    SELECT c.custid, c.name, COUNT(o.orderno) AS `order count`
+    FROM customers AS c LEFT OUTER JOIN orders AS o ON c.custid = o.custid
+    GROUP BY c.custid, c.name
+    ORDER BY c.custid;
+
+ Result:
+
+    [
+        {
+            "custid": "C13",
+            "order count": 4,
+            "name": "T. Cody"
+        },
+        {
+            "custid": "C25",
+            "order count": 0,
+            "name": "M. Sinclair"
+        },
+        {
+            "custid": "C31",
+            "order count": 1,
+            "name": "B. Pruitt"
+        },
+        {
+            "custid": "C35",
+            "order count": 1,
+            "name": "J. Roberts"
+        },
+        {
+            "custid": "C37",
+            "order count": 1,
+            "name": "T. Henry"
+        },
+        {
+            "custid": "C41",
+            "order count": 2,
+            "name": "R. Dodge"
+        },
+        {
+            "custid": "C47",
+            "order count": 0,
+            "name": "S. Logan"
+        }
+    ]
+
+  
+Notice in Q3.17 what happens when the special aggregation function `COUNT` is applied to a collection that does not exist, such as the orders of M. Sinclair: it returns zero. This behavior is unlike that of the other special aggregation functions `SUM`, `AVG`, `MAX`, and `MIN`, which return `null` if their operand does not exist. This should make you cautious about the `COUNT` function: If it returns zero, that may mean that the collection you are counting has zero members, or that it does not exist, or that you have misspelled the collection's name.
+
+Q3.17 also shows how a query block can have more than one grouping expression. In general, the `GROUP BY`clause produces a binding tuple for each different combination of values for the grouping expressions. In Q3.17, the `c.custid` field uniquely identifies a customer, so adding `c.name` as a grouping expression does not result in any more groups. Nevertheless, `c.name` must be included as a grouping expression if it is to be referenced outside (after) the `GROUP BY` clause. If `c.name` were not included in the `GROUP BY` clause, it would not be a group property and could not be used in the `SELECT` clause.
+
+Of course, a grouping expression need not be a simple field-name. In Q3.18, orders are grouped by month, using a temporal function to extract the month component of the order dates. In cases like this, it is helpful to define an alias for the grouping expression so that it can be referenced elsewhere in the query e.g. in the `SELECT` clause.
+
+##### Example
+
+(Q3.18) Find the months in 2020 that had the largest numbers of orders; list the months and their numbers of orders. (Return the top three.)
+
+    FROM orders AS o
+    WHERE get_year(date(o.order_date)) = 2020
+    GROUP BY get_month(date(o.order_date)) AS month
+    SELECT month, COUNT(*) AS order_count
+    ORDER BY order_count desc
+    LIMIT 3;
+
+Result:
+
+    [
+        {
+            "month": 10,
+            "order_count": 2
+        },
+        {
+            "month": 9,
+            "order_count": 2
+        },
+        {
+            "month": 8,
+            "order_count": 1
+        }
+    ]
+
+Groups are commonly formed from named collections like `customers` and `orders`. But in some queries you need to form groups from a collection that is nested inside another collection, such as `items` inside `orders`. In SQL++ you can do this by using left-correlation in the `FROM` clause to unnest the inner collection, joining the inner collection with the outer collection, and then performing the grouping on the join, as illustrated in Q3.19.
+
+Q3.19 also shows how a `LET` clause can be used after a `GROUP BY` clause to define an expression that is referenced multiple times in later clauses.
+
+##### Example
+
+(Q3.19) For each order, define the total revenue of the order as the sum of quantity times price for all the items in that order. List the total revenue for all the orders placed by the customer with id "C13", in descending order by total revenue.
+
+
+    FROM orders as o, o.items as i
+    WHERE o.custid = "C13"
+    GROUP BY o.orderno
+    LET total_revenue = sum(i.qty * i.price)
+    SELECT o.orderno, total_revenue
+    ORDER BY total_revenue desc;
+    
+Result:
+    
+    [
+        {
+            "orderno": 1002,
+            "total_revenue": 10906.55
+        },
+        {
+            "orderno": 1008,
+            "total_revenue": 1999.8
+        },
+        {
+            "orderno": 1007,
+            "total_revenue": 130.45
+        }
+    ]
+
+### <a id="Left_outer_unnests">HAVING Clause</a>
+
+---
+
+### HavingClause
+**![](../images/diagrams/HavingClause.png)**
+
+
+---
+
+The `HAVING` clause is very similar to the `WHERE` clause, except that it comes after `GROUP BY` and applies a filter to groups rather than to individual objects. Here's an example of a `HAVING` clause that filters orders by applying a condition to their nested arrays of `items`.
+
+By adding a `HAVING` clause to Q3.19, we can filter the results to include only those orders whose total revenue is greater than 1000, as shown in Q3.22.
+
+
+##### Example
+  
+(Q3.20) Modify Q3.19 to include only orders whose total revenue is greater than 5000.
+
+    FROM orders AS o, o.items as i
+    WHERE o.custid = "C13"
+    GROUP BY o.orderno
+    LET total_revenue = sum(i.qty * i.price)
+    HAVING total_revenue > 5000
+    SELECT o.orderno, total_revenue
+    ORDER BY total_revenue desc;
+
+Result:
+
+    [
+        {
+            "orderno": 1002,
+            "total_revenue": 10906.55
+        }
+    ]
+
+### <a id="Aggregation_PseudoFunctions">Aggregation Pseudo-Functions</a>
+
+SQL provides several special functions for performing aggregations on groups including: `SUM`, `AVG`, `MAX`, `MIN`, and `COUNT` (some implementations provide more). These same functions are supported in SQL++. However, it's worth spending some time on these special functions because they don't behave like ordinary functions. They are called "pseudo-functions" here because they don't evaluate their operands in the same way as ordinary functions. To see the difference, consider these two examples, which are syntactically similar:
+
+##### Example 1:
+  
+    SELECT LENGTH(name) FROM customers
+
+  In Example 1, `LENGTH` is an ordinary function. It simply evaluates its operand (name) and then returns a result computed from the operand.
+
+##### Example 2: 
+    SELECT AVG(rating) FROM customers
+
+The effect of `AVG` in Example 2 is quite different. Rather than performing a computation on an individual rating value, `AVG` has a global effect: it effectively restructures the query. As a pseudo-function, `AVG` requires its operand to be a group; therefore, it automatically collects all the rating values from the query block and forms them into a group.
+
+The aggregation pseudo-functions always require their operand to be a group. In some queries, the group is explicitly generated by a `GROUP BY` clause, as in Q3.21:
+##### Example
+  (Q3.21) List the average credit rating of customers by zipcode.
+
+    FROM customers AS c
+    GROUP BY c.address.zipcode AS zip
+    SELECT zip, AVG(c.rating) AS `avg credit rating`
+    ORDER BY zip;
+
+ Result:
+
+    [
+        {
+            "avg credit rating": 625
+        },
+        {
+            "avg credit rating": 657.5,
+            "zip": "02115"
+        },
+        {
+            "avg credit rating": 690,
+            "zip": "02340"
+        },
+        {
+            "avg credit rating": 695,
+            "zip": "63101"
+        }
+    ]
+
+Note in the result of Q3.21 that one or more customers had no zipcode. These customers were formed into a group for which the value of the grouping key is missing. When the query results were returned in JSON format, the `missing` key simply does not appear. Also note that the group whose key is `missing` appears first because `missing` is considered to be smaller than any other value. If some customers had had `null` as a zipcode, they would have been included in another group, appearing after the `missing` group but before the other groups.
+
+When an aggregation pseudo-function is used without an explicit `GROUP BY` clause, it implicitly forms the entire query block into a single group, as in Q3.22:
+##### Example
+(Q3.22) Find the average credit rating among all customers.
+
+  
+
+    FROM customers AS c
+    SELECT AVG(c.rating) AS `avg credit rating`;
+
+Result:
+
+    [
+        {
+            "avg credit rating": 670
+        }
+    ]
+
+  
+
+The aggregation pseudo-function `COUNT` has a special form in which its operand is `*` instead of an expression. For example, `SELECT COUNT(*) FROM customers` simply returns the total number of customers, whereas `SELECT COUNT(rating) FROM customers` returns the number of customers who have known ratings (that is, their ratings are not `null` or `missing`).
+
+  
+
+ Because the aggregation pseudo-functions sometimes restructure their operands, they can be used only in query blocks where (explicit or implicit) grouping is being done. Therefore the pseudo-functions cannot operate directly on arrays or multisets. For operating directly on JSON collections, SQL++ provides a set of ordinary functions for computing aggregations. Each ordinary aggregation function (except the ones corresponding to `COUNT` and `ARRAY_AGG`) has two versions: one that ignores `null` and `missing` values and one that returns `null` if a `null` or `missing` value is encountered anywhere in the collection. The names of the aggregation functions are as follows:
+ 
+| Aggregation pseudo-function; operates on groups only |  ordinary functions: Ignores NULL or MISSING values | ordinary functions: Returns NULL if NULL or MISSING are encountered| 
+|----------|----------|--------|
+|SUM| ARRAY_SUM| STRICT_SUM |
+| AVG |ARRAY_MAX| STRICT_MAX |
+| MAX | ARRAY_MIN| STRICT_MIN |
+| MIN | ARRAY_AVG| STRICT_AVG |
+| COUNT        |ARRAY_COUNT|STRICT_COUNT (see exception below) |
+|STDDEV_SAMP|ARRAY_STDDEV_SAMP| STRICT_STDDEV_SAMP |
+|STDDEV_POP|ARRAY_STDDEV_POP| STRICT_STDDEV_POP |
+|VAR_SAMP|ARRAY_VAR_SAMP| STRICT_VAR_SAMP |
+|VAR_POP|ARRAY_VAR_POP| STRICT_VAR_POP |
+|SKEWENESS|ARRAY_SKEWNESS| STRICT_SKEWNESS |
+|KURTOSIS|ARRAY_KURTOSIS| STRICT_KURTOSIS |
+||ARRAY_AGG||||
+
+
+##### Exception: the ordinary aggregation function STRICT_COUNT operates on any collection, and returns a count of its items, including null values in the count. In this respect, STRICT_COUNT is more similar to COUNT(*) than to COUNT(expression).
+
+ Note that the ordinary aggregation functions that ignore `null` have names beginning with "ARRAY." This naming convention has historical roots. Despite their names, the functions operate on both arrays and multisets.
+
+  
+
+Because of the special properties of the aggregation pseudo-functions, SQL (and therefore SQL++) is not a pure functional language. But every query that uses a pseudo-function can be expressed as an equivalent query that uses an ordinary function. Q3.23 is an example of how queries can be expressed without pseudo-functions. A more detailed explanation of all of the functions is also available [here](builtins.html#AggregateFunctions) .
+
+##### Example  
+
+ (Q3.23) Alternative form of Q3.22, using the ordinary function `ARRAY_AVG` rather than the aggregating pseudo-function `AVG`.
+
+  
+
+    SELECT ARRAY_AVG(
+        (SELECT VALUE c.rating
+        FROM customers AS c) ) AS `avg credit rating`;
+
+ Result (same as Q3.22):
+
+  
+    [
+        {
+            "avg credit rating": 670
+        }
+    ]
+
+If the function `STRICT_AVG` had been used in Q3.23 in place of `ARRAY_AVG`, the average credit rating returned by the query would have been `null`, because at least one customer has no credit rating.
+
+
+
+### <a id="Left_outer_unnests">GROUP AS Clause</a>
+
+---
+
+### GroupAsClause
+**![](../images/diagrams/GroupAsClause.png)**
+
+
+---
+
+JSON is a hierarchical format, and a fully featured JSON query language needs to be able to produce hierarchies of its own, with computed data at every level of the hierarchy. The key feature of SQL++ that makes this possible is the `GROUP AS` clause.
+
+  
+
+A query may have a `GROUP AS` clause only if it has a `GROUP BY` clause. The `GROUP BY` clause "hides" the original objects in each group, exposing only the grouping expressions and special aggregation functions on the non-grouping fields. The purpose of the `GROUP AS` clause is to make the original objects in the group visible to subsequent clauses. Thus the query can generate output data both for the group as a whole and for the individual objects inside the group.
+
+  
+
+For each group, the `GROUP AS` clause preserves all the objects in the group, just as they were before grouping, and gives a name to this preserved group. The group name can then be used in the `FROM` clause of a subquery to process and return the individual objects in the group.
+
+  
+
+To see how this works, we'll write some queries that investigate the customers in each zipcode and their credit ratings. This would be a good time to review the sample database in Appendix 4. A part of the data is summarized below. 
+
+    Customers in zipcode 02115:
+        C35, J. Roberts, rating 565
+        C37, T. Henry, rating 750
+
+    Customers in zipcode 02340:
+        C25, M. Sinclair, rating 690
+
+    Customers in zipcode 63101:
+        C13, T. Cody, rating 750
+        C31, B. Pruitt, (no rating)
+        C41, R. Dodge, rating 640
+        
+    Customers with no zipcode:
+        C47, S. Logan, rating 625
+
+  
+
+Now let's consider the effect of the following clauses:
+
+    FROM customers AS c
+    GROUP BY c.address.zipcode
+    GROUP AS g
+
+This query fragment iterates over the `customers` objects, using the iteration variable `c`. The `GROUP BY` clause forms the objects into groups, each with a common zipcode (including one group for customers with no zipcode). After the `GROUP BY` clause, we can see the grouping expression, `c.address.zipcode`, but other fields such as `c.custid` and `c.name` are visible only to special aggregation functions.
+  
+The clause `GROUP AS g` now makes the original objects visible again. For each group in turn, the variable `g` is bound to a multiset of objects, each of which has a field named `c`, which in turn contains one of the original objects. Thus after `GROUP AS g`, for the group with zipcode 02115, `g` is bound to the following multiset:
+
+    
+    [ 
+        { "c": 
+            { "custid": "C35",
+              "name": "J. Roberts",
+              "address":
+                { "street": "420 Green St.",
+                  "city": "Boston, MA",
+                  "zipcode": "02115"
+                },
+              "rating": 565
             }
-        ],
-        "uid": 1
-    }, {
-        "msgs": [
-            {
-                "msg": {
-                    "senderLocation": [
-                        31.5,
-                        75.56
-                    ],
-                    "inResponseTo": 1,
-                    "messageId": 6,
-                    "authorId": 2,
-                    "message": " like product-z its platform is mind-blowing"
+        },
+        { "c":
+            { "custid": "C37",
+              "name": "T. Henry",
+              "address":
+                { "street": "120 Harbor Blvd.",
+                  "city": "St. Louis, MO",
+                  "zipcode": "02115"
+                },
+              "rating": 750
+            }
+        }
+    ]
+
+  
+
+Thus, the clauses following `GROUP AS` can see the original objects by writing subqueries that iterate over the multiset `g`.
+
+The extra level named `c` was introduced into this multiset because the groups might have been formed from a join of two or more collections. Suppose that the `FROM` clause looked like `FROM customers AS c, orders AS o`. Then each item in the group would contain both a `customers` object and an `orders` object, and these two objects might both have a field with the same name. To avoid ambiguity, each of the original objects is wrapped in an "outer" object that gives it the name of its iteration variable in the `FROM` clause. Consider this fragment:
+
+    FROM customers AS c, orders AS o
+    WHERE c.custid = o.custid
+    GROUP BY c.address.zipcode
+    GROUP AS g
+
+In this case, following `GROUP AS g`, the variable `g` would be bound to the following collection:
+
+    [ 
+        { "c": { an original customers object },
+          "o": { an original orders object }
+        },
+        { "c": { another customers object },
+          "o": { another orders object }
+        },
+        ...
+    ]
+
+After using `GROUP AS` to make the content of a group accessible, you will probably want to write a subquery to access that content. A subquery for this purpose is written in exactly the same way as any other subquery. The name  specified in the `GROUP AS` clause (`g` in the above example) is the name of a collection of objects. You can write a `FROM` clause to iterate over the objects in the collection, and you can specify an iteration variable to represent each object in turn. For `GROUP AS` queries in this manual, I'll use `g`as the name of the reconstituted group, and `gi` as an iteration variable representing one object inside the group. Of course, you can use any names you like for these purposes.
+
+Now we are ready to take a look at how `GROUP AS` might be used in a query. Suppose that we want to group customers by zipcode, and for each group we want to see the average credit rating and a list of the individual customers in the group. Here's a query that does that:
+
+##### Example 
+(Q3.24) For each zipcode, list the average credit rating in that zipcode, followed by the customer numbers and names in numeric order.
+
+    FROM customers AS c
+    GROUP BY c.address.zipcode AS zip
+    GROUP AS g
+    SELECT zip, AVG(c.rating) AS `avg credit rating`,
+        (FROM g AS gi
+         SELECT gi.c.custid, gi.c.name
+         ORDER BY gi.c.custid) AS `local customers`
+    ORDER BY zip;
+
+Result:
+
+    [
+        {
+            "avg credit rating": 625,
+            "local customers": [
+                {
+                    "custid": "C47",
+                    "name": "S. Logan"
                 }
-            },
-            {
-                "msg": {
-                    "senderLocation": [
-                        48.09,
-                        81.01
-                    ],
-                    "inResponseTo": 4,
-                    "messageId": 3,
-                    "authorId": 2,
-                    "message": " like product-y the plan is amazing"
+            ]
+        },
+        {
+            "avg credit rating": 657.5,
+            "local customers": [
+                {
+                    "custid": "C35",
+                    "name": "J. Roberts"
+                },
+                {
+                    "custid": "C37",
+                    "name": "T. Henry"
                 }
-            }
-        ],
-        "uid": 2
-    } ]
+            ],
+            "zip": "02115"
+        },
+        {
+            "avg credit rating": 690,
+            "local customers": [
+                {
+                    "custid": "C25",
+                    "name": "M. Sinclair"
+                }
+            ],
+            "zip": "02340"
+        },
+        {
+            "avg credit rating": 695,
+            "local customers": [
+                {
+                    "custid": "C13",
+                    "name": "T. Cody"
+                },
+                {
+                    "custid": "C31",
+                    "name": "B. Pruitt"
+                },
+                {
+                    "custid": "C41",
+                    "name": "R. Dodge"
+                }
+            ],
+            "zip": "63101"
+        }
+    ]
+Note that this query contains two `ORDER BY` clauses: one in the outer query and one in the subquery. These two clauses govern the ordering of the outer-level list of zipcodes and the inner-level lists of customers, respectively. Also note that the group of customers with no zipcode comes first in the output list. For additional reading on SQL++ and more examples using `GROUP AS`  as well as other clauses discussed in this manual see the [SQL++ Tutorial](https://asterixdb.apache.org/files/SQL_Book.pdf).
 
-As we can see from the above query result, each group in the example query's output has an associated group
-variable value called `msgs` that appears in the `SELECT *`'s result.
-This variable contains a collection of objects associated with the group; each of the group's `message` values
-appears in the `msg` field of the objects in the `msgs` collection.
+## <a id="Union_all">Selection and UNION ALL</a>
 
-The group variable in the query language makes more complex, composable, nested subqueries over a group possible, which is
-important given the language's more complex data model (relative to SQL).
-As a simple example of this, as we really just want the messages associated with each user, we might wish to avoid
-the "extra wrapping" of each message as the `msg` field of an object.
-(That wrapping is useful in more complex cases, but is essentially just in the way here.)
-We can use a subquery in the `SELECT` clause to tunnel through the extra nesting and produce the desired result.
+---
+### Selection
+**![](../images/diagrams/Selection.png)**
+
+### UnionOption
+**![](../images/diagrams/UnionOption.png)**
+
+---
+
+In a SQL++ query, two or more query blocks can be connected by the operator `UNION ALL`. The result of a `UNION ALL` between two query blocks contains all the items returned by the first query block, and all the items returned by the second query block. Duplicate items are not eliminated from the query result.
+
+As in SQL, there is no ordering guarantee on the contents of the output stream. However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular, it allows heterogeneity on the input and output streams. A type error will be raised if one of the inputs is not a collection.
+
+When two or more query blocks are connected by `UNION ALL`, they can be followed by `ORDER BY`, `LIMIT`, and `OFFSET` clauses that apply to the `UNION` query as a whole. For these clauses to be meaningful, the field-names returned by the two query blocks should match. The following example shows a `UNION ALL` of two query blocks, with an ordering specified for the result.
+
+In this example, a customer might be selected because he has ordered more than two different items (first query block) or because he has a high credit rating (second query block). By adding an explanatory string to each query block, the query writer can cause the output objects to be labeled to distinguish these two cases.
+
+  
 
 ##### Example
 
-    SELECT uid, (SELECT VALUE g.msg FROM g) AS msgs
-    FROM GleambookMessages gbm
-    GROUP BY gbm.authorId AS uid
-    GROUP AS g(gbm as msg);
+(Q3.25a) Find customer ids for customers who have placed orders for more than two different items or who have a credit rating greater than 700, with labels to distinguish these cases.
 
-This variant of the example query returns:
+  
 
-       [ {
-           "msgs": [
-               {
-                   "senderLocation": [
-                       38.97,
-                       77.49
-                   ],
-                   "inResponseTo": 1,
-                   "messageId": 11,
-                   "authorId": 1,
-                   "message": " can't stand acast its plan is terrible"
-               },
-               {
-                   "senderLocation": [
-                       41.66,
-                       80.87
-                   ],
-                   "inResponseTo": 4,
-                   "messageId": 2,
-                   "authorId": 1,
-                   "message": " dislike x-phone its touch-screen is horrible"
-               },
-               {
-                   "senderLocation": [
-                       37.73,
-                       97.04
-                   ],
-                   "inResponseTo": 2,
-                   "messageId": 4,
-                   "authorId": 1,
-                   "message": " can't stand acast the network is horrible:("
-               },
-               {
-                   "senderLocation": [
-                       40.33,
-                       80.87
-                   ],
-                   "inResponseTo": 11,
-                   "messageId": 8,
-                   "authorId": 1,
-                   "message": " like ccast the 3G is awesome:)"
-               },
-               {
-                   "senderLocation": [
-                       42.5,
-                       70.01
-                   ],
-                   "inResponseTo": 12,
-                   "messageId": 10,
-                   "authorId": 1,
-                   "message": " can't stand product-w the touch-screen is terrible"
-               }
-           ],
-           "uid": 1
-       }, {
-           "msgs": [
-               {
-                   "senderLocation": [
-                       31.5,
-                       75.56
-                   ],
-                   "inResponseTo": 1,
-                   "messageId": 6,
-                   "authorId": 2,
-                   "message": " like product-z its platform is mind-blowing"
-               },
-               {
-                   "senderLocation": [
-                       48.09,
-                       81.01
-                   ],
-                   "inResponseTo": 4,
-                   "messageId": 3,
-                   "authorId": 2,
-                   "message": " like product-y the plan is amazing"
-               }
-           ],
-           "uid": 2
-       } ]
+	FROM orders AS o, o.items AS i
+	GROUP BY o.orderno, o.custid
+	HAVING COUNT(*) > 2
+	SELECT DISTINCT o.custid AS customer_id, "Big order" AS reason
 
-The next example shows a more interesting case involving the use of a subquery in the `SELECT` list.
-Here the subquery further processes the groups.
-There is no renaming in the declaration of the group variable `g` such that
-`g` only has one field `gbm` which comes from the `FROM` clause.
+	UNION ALL
+
+	FROM customers AS c
+	WHERE rating > 700
+	SELECT c.custid AS customer_id, "High rating" AS reason
+	ORDER BY customer_id;
+
+Result:
+
+	  
+	[
+	    {
+	        "reason": "High rating",
+	        "customer_id": "C13"
+	    },
+	    {
+	        "reason": "Big order",
+	        "customer_id": "C37"
+	    },
+	    {
+	        "reason": "High rating",
+	        "customer_id": "C37"
+	    },
+	    {
+	        "reason": "Big order",
+	        "customer_id": "C41"
+	    }
+	]
+
+  
+
+If, on the other hand, you simply want a list of the customer ids and you don't care to preserve the reasons, you can simplify your output by using `SELECT VALUE`, as follows:
+
+  
+
+(Q3.25b) Simplify Q3.25a to return a simple list of unlabeled customer ids.
+
+  
+
+	FROM orders AS o, o.items AS i
+	GROUP BY o.orderno, o.custid
+	HAVING COUNT(*) > 2
+	SELECT VALUE o.custid
+
+	UNION ALL
+
+	FROM customers AS c
+	WHERE rating > 700
+	SELECT VALUE c.custid;
+
+Result:
+
+	[
+	    "C37",
+	    "C41",
+	    "C13",
+	    "C37"
+	]
+
+## <a id="With_clauses">WITH Clause</a>
+
+---
+
+### WithClause
+**![](../images/diagrams/WithClause.png)**
+
+
+---
+
+As in standard SQL, a `WITH` clause can be used to improve the modularity of a query. A `WITH` clause often contains a subquery that is needed to compute some result that is used later in the main query. In cases like this, you can think of the `WITH` clause as computing a “temporary view" of the input data. The next example uses a `WITH` clause to compute the total revenue of each order in 2020; then the main part of the query finds the minimum, maximum, and average revenue for orders in that year.
 
 ##### Example
 
-    SELECT uid,
-           (SELECT VALUE g.gbm
-            FROM g
-            WHERE g.gbm.message LIKE '% like%'
-            ORDER BY g.gbm.messageId
-            LIMIT 2) AS msgs
-    FROM GleambookMessages gbm
-    GROUP BY gbm.authorId AS uid
-    GROUP AS g;
+(Q3.26) Find the minimum, maximum, and average revenue among all orders in 2020, rounded to the nearest integer. 
 
-This example query returns:
+    WITH order_revenue AS
+        (FROM orders AS o, o.items AS i
+        WHERE get_year(date(o.order_date)) = 2020
+        GROUP BY o.orderno
+        SELECT o.orderno, SUM(i.qty * i.price) AS revenue
+      )
+    FROM order_revenue
+    SELECT AVG(revenue) AS average,
+	       MIN(revenue) AS minimum,
+           MAX(revenue) AS maximum;
+         
 
-    [ {
-        "msgs": [
-            {
-                "senderLocation": [
-                    40.33,
-                    80.87
-                ],
-                "inResponseTo": 11,
-                "messageId": 8,
-                "authorId": 1,
-                "message": " like ccast the 3G is awesome:)"
-            }
-        ],
-        "uid": 1
-    }, {
-        "msgs": [
-            {
-                "senderLocation": [
-                    48.09,
-                    81.01
-                ],
-                "inResponseTo": 4,
-                "messageId": 3,
-                "authorId": 2,
-                "message": " like product-y the plan is amazing"
-            },
-            {
-                "senderLocation": [
-                    31.5,
-                    75.56
-                ],
-                "inResponseTo": 1,
-                "messageId": 6,
-                "authorId": 2,
-                "message": " like product-z its platform is mind-blowing"
-            }
-        ],
-        "uid": 2
-    } ]
+Result:
 
-### <a id="Implicit_group_key_variables">Implicit Grouping Key Variables</a>
-In the query language syntax, providing named binding variables for `GROUP BY` key expressions is optional.
-If a grouping key is missing a user-provided binding variable, the underlying compiler will generate one.
-Automatic grouping key variable naming falls into three cases, much like the treatment of unnamed projections:
+    [
+        {
+            "average": 4669.99,
+            "minimum": 130.45,
+            "maximum": 18847.58
+        }
+    ]
 
-  * If the grouping key expression is a variable reference expression, the generated variable gets the same name as the referred variable;
-  * If the grouping key expression is a field access expression, the generated variable gets the same name as the last identifier in the expression;
-  * For all other cases, the compiler generates a unique variable (but the user query is unable to refer to this generated variable).
+`WITH` can be particularly useful when a value needs to be used several times in a query.
 
-The next example illustrates a query that doesn't provide binding variables for its grouping key expressions.
+## <a id="Order_By_clauses">ORDER BY and LIMIT Clauses</a>
 
-##### Example
+---
+### OrderbyClause
+**![](../images/diagrams/OrderbyClause.png)**
 
-    SELECT authorId,
-           (SELECT VALUE g.gbm
-            FROM g
-            WHERE g.gbm.message LIKE '% like%'
-            ORDER BY g.gbm.messageId
-            LIMIT 2) AS msgs
-    FROM GleambookMessages gbm
-    GROUP BY gbm.authorId
-    GROUP AS g;
+### LimitClause
+**![](../images/diagrams/LimitClause.png)**
 
-This query returns:
+---
+   
+The last two (optional) clauses to be processed in a query are `ORDER BY` and `LIMIT`.
 
-        [ {
-        "msgs": [
-            {
-                "senderLocation": [
-                    40.33,
-                    80.87
-                ],
-                "inResponseTo": 11,
-                "messageId": 8,
-                "authorId": 1,
-                "message": " like ccast the 3G is awesome:)"
-            }
-        ],
-        "authorId": 1
-    }, {
-        "msgs": [
-            {
-                "senderLocation": [
-                    48.09,
-                    81.01
-                ],
-                "inResponseTo": 4,
-                "messageId": 3,
-                "authorId": 2,
-                "message": " like product-y the plan is amazing"
-            },
-            {
-                "senderLocation": [
-                    31.5,
-                    75.56
-                ],
-                "inResponseTo": 1,
-                "messageId": 6,
-                "authorId": 2,
-                "message": " like product-z its platform is mind-blowing"
-            }
-        ],
-        "authorId": 2
-    } ]
-
-Based on the three variable generation rules, the generated variable for the grouping key expression `message.authorId`
-is `authorId` (which is how it is referred to in the example's `SELECT` clause).
-
-### <a id="Implicit_group_variables">Implicit Group Variables</a>
-The group variable itself is also optional in the `GROUP BY` syntax.
-If a user's query does not declare the name and structure of the group variable using `GROUP AS`,
-the query compiler will generate a unique group variable whose fields include all of the binding
-variables defined in the `FROM` clause of the current enclosing `SELECT` statement.
-In this case the user's query will not be able to refer to the generated group variable,
-but is able to call SQL-92 aggregation functions as in SQL-92.
-
-
-### <a id="Aggregation_functions">Aggregation Functions</a>
-In the traditional SQL, which doesn't support nested data, grouping always also involves the use of aggregation
-to compute properties of the groups (for example, the average number of messages per user rather than the actual set
-of messages per user).
-Each aggregation function in the query language takes a collection (for example, the group of messages) as its input and produces
-a scalar value as its output.
-These aggregation functions, being truly functional in nature (unlike in SQL), can be used anywhere in a
-query where an expression is allowed.
-The following table catalogs the built-in aggregation functions of the query language and also indicates how each one handles
-`NULL`/`MISSING` values in the input collection or a completely empty input collection:
-
-| Function       | NULL         | MISSING      | Empty Collection |
-|----------------|--------------|--------------|------------------|
-| STRICT_COUNT   | counted      | counted      | 0                |
-| STRICT_SUM     | returns NULL | returns NULL | returns NULL     |
-| STRICT_MAX     | returns NULL | returns NULL | returns NULL     |
-| STRICT_MIN     | returns NULL | returns NULL | returns NULL     |
-| STRICT_AVG     | returns NULL | returns NULL | returns NULL     |
-| STRICT_STDDEV_SAMP | returns NULL | returns NULL | returns NULL |
-| STRICT_STDDEV_POP  | returns NULL | returns NULL | returns NULL |
-| STRICT_VAR_SAMP    | returns NULL | returns NULL | returns NULL |
-| STRICT_VAR_POP     | returns NULL | returns NULL | returns NULL |
-| STRICT_SKEWNESS    | returns NULL | returns NULL | returns NULL |
-| STRICT_KURTOSIS    | returns NULL | returns NULL | returns NULL |
-| ARRAY_COUNT    | not counted  | not counted  | 0                |
-| ARRAY_SUM      | ignores NULL | ignores NULL | returns NULL     |
-| ARRAY_MAX      | ignores NULL | ignores NULL | returns NULL     |
-| ARRAY_MIN      | ignores NULL | ignores NULL | returns NULL     |
-| ARRAY_AVG      | ignores NULL | ignores NULL | returns NULL     |
-| ARRAY_STDDEV_SAMP  | ignores NULL | ignores NULL | returns NULL |
-| ARRAY_STDDEV_POP   | ignores NULL | ignores NULL | returns NULL |
-| ARRAY_VAR_SAMP     | ignores NULL | ignores NULL | returns NULL |
-| ARRAY_VAR_POP      | ignores NULL | ignores NULL | returns NULL |
-| ARRAY_SKEWNESS     | ignores NULL | ignores NULL | returns NULL |
-| ARRAY_KURTOSIS     | ignores NULL | ignores NULL | returns NULL |
-
-Notice that the query language offers two versions for each of the aggregate functions listed above.
-For each function, the STRICT version handles `UNKNOWN` values in a semantically strict fashion,
-where unknown values in the input result in unknown values in the output; and the ARRAY version
-handles them in the ad hoc "just ignore the unknown values" fashion that the SQL standard chose to adopt.
-
-##### Example
-
-    ARRAY_AVG(
-        (
-          SELECT VALUE ARRAY_COUNT(friendIds) FROM GleambookUsers
-        )
-    );
-
-This example returns:
-
-    3.3333333333333335
-
-##### Example
-
-    SELECT uid AS uid, ARRAY_COUNT(grp) AS msgCnt
-    FROM GleambookMessages message
-    GROUP BY message.authorId AS uid
-    GROUP AS grp(message AS msg);
-
-This query returns:
-
-    [ {
-        "uid": 1,
-        "msgCnt": 5
-    }, {
-        "uid": 2,
-        "msgCnt": 2
-    } ]
-
-Notice how the query forms groups where each group involves a message author and their messages.
-(SQL cannot do this because the grouped intermediate result is non-1NF in nature.)
-The query then uses the collection aggregate function ARRAY_COUNT to get the cardinality of each
-group of messages.
-
-Each aggregation function in the query language supports the DISTINCT modifier that removes duplicate values from
-the input collection.
-
-##### Example
-
-    ARRAY_SUM(DISTINCT [1, 1, 2, 2, 3])
-
-This query returns:
-
-    6
-
-### <a id="SQL-92_aggregation_functions">SQL-92 Aggregation Functions</a>
-For compatibility with the traditional SQL aggregation functions, the query language also offers SQL-92's
-aggregation function symbols (`COUNT`, `SUM`, `MAX`, `MIN`, `AVG`, `ARRAY_AGG`, `STDDEV_SAMP`, `STDDEV_POP`, `VAR_SAMP`,
-`VAR_POP`) as supported syntactic sugar.
-The query compiler rewrites queries that utilize these function symbols into queries that only
-use the collection aggregate functions of the query language. The following example uses the SQL-92 syntax approach
-to compute a result that is identical to that of the more explicit example above:
-
-##### Example
-
-    SELECT uid, COUNT(*) AS msgCnt
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId AS uid;
-
-It is important to realize that `COUNT` is actually **not** a built-in aggregation function.
-Rather, the `COUNT` query above is using a special "sugared" function symbol that the query compiler
-will rewrite as follows:
-
-    SELECT uid AS uid, ARRAY_COUNT( (SELECT VALUE 1 FROM `$1` AS g) ) AS msgCnt
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId AS uid
-    GROUP AS `$1`(msg AS msg);
-
-
-The same sort of rewritings apply to the function symbols `SUM`, `MAX`, `MIN`, `AVG`, `ARRAY_AGG`,`STDDEV_SAMP`,
-`STDDEV_POP`, `VAR_SAMP`, and `VAR_POP`.
-In contrast to the collection aggregate functions of the query language, these special SQL-92 function symbols
-can only be used in the same way they are in standard SQL (i.e., with the same restrictions).
-
-The DISTINCT modifier is also supported for these aggregate functions.
-
-The following table shows the SQL-92 functions supported by the query language, their aliases where available,
-and their corresponding built-in functions.
-
-| SQL-92 Function | Aliases                 | Corresponding Built-in Function |
-|-----------------|-------------------------|---------------------------------|
-| COUNT           |                         | ARRAY_COUNT                     |
-| SUM             |                         | ARRAY_SUM                       |
-| MAX             |                         | ARRAY_MAX                       |
-| MIN             |                         | ARRAY_MIN                       |
-| AVG             |                         | ARRAY_AVG                       |
-| ARRAY_AGG       |                         | (none)                          |
-| STDDEV_SAMP     | STDDEV                  | ARRAY_STDDEV_SAMP               |
-| STDDEV_POP      |                         | ARRAY_STDDEV_POP                |
-| VAR_SAMP        | VARIANCE, VARIANCE_SAMP | ARRAY_VAR_SAMP                  |
-| VAR_POP         | VARIANCE_POP            | ARRAY_VAR_POP                   |
-
-Note that the `ARRAY_AGG` function symbol is rewritten simply to return the result of the generated subquery,
-without applying any built-in function.
-
-SQL aggregate function calls optionally support a FILTER subclause.
-
-##### Example
-
-    SELECT uid, COUNT(*) FILTER (WHERE msg.message LIKE "%awesome%") AS msgCnt
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId AS uid;
-
-The query compiler rewrites this query to use the built-in aggregate as follows:
-
-    SELECT uid AS uid, ARRAY_COUNT( (SELECT VALUE 1 FROM `$1` AS g WHERE g.msg.message LIKE "%awesome%") ) AS msgCnt
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId AS uid
-    GROUP AS `$1`(msg AS msg);
-
-Note that the FILTER subclause is not supported for built-in aggregate function calls.
-
-### <a id="SQL-92_compliant_gby">SQL-92 Compliant GROUP BY Aggregations</a>
-The query language provides full support for SQL-92 `GROUP BY` aggregation queries.
-The following query is such an example:
-
-##### Example
-
-    SELECT msg.authorId, COUNT(*)
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId;
-
-This query outputs:
-
-    [ {
-        "authorId": 1,
-        "$1": 5
-    }, {
-        "authorId": 2,
-        "$1": 2
-    } ]
-
-In principle, a `msg` reference in the query's `SELECT` clause would be "sugarized" as a collection
-(as described in [Implicit Group Variables](#Implicit_group_variables)).
-However, since the SELECT expression `msg.authorId` is syntactically identical to a GROUP BY key expression,
-it will be internally replaced by the generated group key variable.
-The following is the equivalent rewritten query that will be generated by the compiler for the query above:
-
-    SELECT authorId AS authorId, ARRAY_COUNT( (SELECT g.msg FROM `$1` AS g) )
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId AS authorId
-    GROUP AS `$1`(msg AS msg);
-
-### <a id="Column_aliases">Column Aliases</a>
-The query language also allows column aliases to be used as `ORDER BY` keys.
-
-##### Example
-
-    SELECT msg.authorId AS aid, COUNT(*)
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId;
-    ORDER BY aid;
-
-This query returns:
-
-    [ {
-        "$1": 5,
-        "aid": 1
-    }, {
-        "$1": 2,
-        "aid": 2
-    } ]
-
-## <a id="Where_having_clauses">WHERE Clauses and HAVING Clauses</a>
-Both `WHERE` clauses and `HAVING` clauses are used to filter input data based on a condition expression.
-Only tuples for which the condition expression evaluates to `TRUE` are propagated.
-Note that if the condition expression evaluates to `NULL` or `MISSING` the input tuple will be discarded.
-
-## <a id="Order_By_clauses">ORDER BY Clauses</a>
 The `ORDER BY` clause is used to globally sort data in either ascending order (i.e., `ASC`) or descending order (i.e., `DESC`).
 During ordering, `MISSING` and `NULL` are treated as being smaller than any other value if they are encountered
 in the ordering key(s). `MISSING` is treated as smaller than `NULL` if both occur in the data being sorted.
-The ordering of values of a given type is consistent with its type's <= ordering; the ordering of values across types is implementation-defined but stable.
-The following example returns all `GleambookUsers` in descending order by their number of friends.
+The ordering of values of a given type is consistent with its type's `<=` ordering; the ordering of values across types is implementation-defined but stable. 
+
+The `LIMIT` clause is used to limit the result set to a specified maximum size. The optional `OFFSET` clause is used to specify a number of items in the output stream to be discarded before the query result begins. 
+
+The following example illustrates use of the `ORDER BY` and `LIMIT` clauses.
 
 ##### Example
+(Q3.27) Return the top three customers by rating.
 
-      SELECT VALUE user
-      FROM GleambookUsers AS user
-      ORDER BY ARRAY_COUNT(user.friendIds) DESC;
+    FROM customers AS c
+    SELECT c.custid, c.name, c.rating
+    ORDER BY c.rating DESC
+    LIMIT 3;
 
-This query returns:
-
-      [ {
-          "userSince": "2012-08-20T10:10:00.000Z",
-          "friendIds": [
-              2,
-              3,
-              6,
-              10
-          ],
-          "gender": "F",
-          "name": "MargaritaStoddard",
-          "nickname": "Mags",
-          "alias": "Margarita",
-          "id": 1,
-          "employment": [
-              {
-                  "organizationName": "Codetechno",
-                  "start-date": "2006-08-06"
-              },
-              {
-                  "end-date": "2010-01-26",
-                  "organizationName": "geomedia",
-                  "start-date": "2010-06-17"
-              }
-          ]
-      }, {
-          "userSince": "2012-07-10T10:10:00.000Z",
-          "friendIds": [
-              1,
-              5,
-              8,
-              9
-          ],
-          "name": "EmoryUnk",
-          "alias": "Emory",
-          "id": 3,
-          "employment": [
-              {
-                  "organizationName": "geomedia",
-                  "endDate": "2010-01-26",
-                  "startDate": "2010-06-17"
-              }
-          ]
-      }, {
-          "userSince": "2011-01-22T10:10:00.000Z",
-          "friendIds": [
-              1,
-              4
-          ],
-          "name": "IsbelDull",
-          "nickname": "Izzy",
-          "alias": "Isbel",
-          "id": 2,
-          "employment": [
-              {
-                  "organizationName": "Hexviafind",
-                  "startDate": "2010-04-27"
-              }
-          ]
-      } ]
-
-## <a id="Limit_clauses">LIMIT Clauses</a>
-The `LIMIT` clause is used to limit the result set to a specified constant size.
-The use of the `LIMIT` clause is illustrated in the next example.
-
-##### Example
-
-      SELECT VALUE user
-      FROM GleambookUsers AS user
-      ORDER BY len(user.friendIds) DESC
-      LIMIT 1;
-
-This query returns:
-
-      [ {
-          "userSince": "2012-08-20T10:10:00.000Z",
-          "friendIds": [
-              2,
-              3,
-              6,
-              10
-          ],
-          "gender": "F",
-          "name": "MargaritaStoddard",
-          "nickname": "Mags",
-          "alias": "Margarita",
-          "id": 1,
-          "employment": [
-              {
-                  "organizationName": "Codetechno",
-                  "start-date": "2006-08-06"
-              },
-              {
-                  "end-date": "2010-01-26",
-                  "organizationName": "geomedia",
-                  "start-date": "2010-06-17"
-              }
-          ]
-      } ]
-
-## <a id="With_clauses">WITH Clauses</a>
-As in standard SQL, `WITH` clauses are available to improve the modularity of a query.
-The next query shows an example.
-
-##### Example
-
-    WITH avgFriendCount AS (
-      SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
-      FROM GleambookUsers AS user
-    )[0]
-    SELECT VALUE user
-    FROM GleambookUsers user
-    WHERE ARRAY_COUNT(user.friendIds) > avgFriendCount;
-
-This query returns:
-
-    [ {
-        "userSince": "2012-08-20T10:10:00.000Z",
-        "friendIds": [
-            2,
-            3,
-            6,
-            10
-        ],
-        "gender": "F",
-        "name": "MargaritaStoddard",
-        "nickname": "Mags",
-        "alias": "Margarita",
-        "id": 1,
-        "employment": [
-            {
-                "organizationName": "Codetechno",
-                "start-date": "2006-08-06"
-            },
-            {
-                "end-date": "2010-01-26",
-                "organizationName": "geomedia",
-                "start-date": "2010-06-17"
-            }
-        ]
-    }, {
-        "userSince": "2012-07-10T10:10:00.000Z",
-        "friendIds": [
-            1,
-            5,
-            8,
-            9
-        ],
-        "name": "EmoryUnk",
-        "alias": "Emory",
-        "id": 3,
-        "employment": [
-            {
-                "organizationName": "geomedia",
-                "endDate": "2010-01-26",
-                "startDate": "2010-06-17"
-            }
-        ]
-    } ]
-
-The query is equivalent to the following, more complex, inlined form of the query:
-
-    SELECT *
-    FROM GleambookUsers user
-    WHERE ARRAY_COUNT(user.friendIds) >
-        ( SELECT VALUE AVG(ARRAY_COUNT(user.friendIds))
-          FROM GleambookUsers AS user
-        ) [0];
-
-WITH can be particularly useful when a value needs to be used several times in a query.
-
-Before proceeding further, notice that both the WITH query and its equivalent inlined variant
-include the syntax "[0]" -- this is due to a noteworthy difference between the query language and SQL-92.
-In SQL-92, whenever a scalar value is expected and it is being produced by a query expression,
-the SQL-92 query processor will evaluate the expression, check that there is only one row and column
-in the result at runtime, and then coerce the one-row/one-column tabular result into a scalar value.
-A JSON query language, being designed to deal with nested data and schema-less data, should not do this.
-Collection-valued data is perfectly legal in most contexts, and its data is schema-less,
-so the query processor rarely knows exactly what to expect where and such automatic conversion would often
-not be desirable. Thus, in the queries above, the use of "[0]" extracts the first (i.e., 0th) element of
-an array-valued query expression's result; this is needed above, even though the result is an array of one
-element, to extract the only element in the singleton array and obtain the desired scalar for the comparison.
-
-## <a id="Let_clauses">LET Clauses</a>
-Similar to `WITH` clauses, `LET` clauses can be useful when a (complex) expression is used several times within a query, allowing it to be written once to make the query more concise. The next query shows an example.
-
-##### Example
-
-    SELECT u.name AS uname, messages AS messages
-    FROM GleambookUsers u
-    LET messages = (SELECT VALUE m
-                    FROM GleambookMessages m
-                    WHERE m.authorId = u.id)
-    WHERE EXISTS messages;
-
-This query lists `GleambookUsers` that have posted `GleambookMessages` and shows all authored messages for each listed user. It returns:
-
-    [ {
-        "uname": "MargaritaStoddard",
-        "messages": [
-            {
-                "senderLocation": [
-                    38.97,
-                    77.49
-                ],
-                "inResponseTo": 1,
-                "messageId": 11,
-                "authorId": 1,
-                "message": " can't stand acast its plan is terrible"
-            },
-            {
-                "senderLocation": [
-                    41.66,
-                    80.87
-                ],
-                "inResponseTo": 4,
-                "messageId": 2,
-                "authorId": 1,
-                "message": " dislike x-phone its touch-screen is horrible"
-            },
-            {
-                "senderLocation": [
-                    37.73,
-                    97.04
-                ],
-                "inResponseTo": 2,
-                "messageId": 4,
-                "authorId": 1,
-                "message": " can't stand acast the network is horrible:("
-            },
-            {
-                "senderLocation": [
-                    40.33,
-                    80.87
-                ],
-                "inResponseTo": 11,
-                "messageId": 8,
-                "authorId": 1,
-                "message": " like ccast the 3G is awesome:)"
-            },
-            {
-                "senderLocation": [
-                    42.5,
-                    70.01
-                ],
-                "inResponseTo": 12,
-                "messageId": 10,
-                "authorId": 1,
-                "message": " can't stand product-w the touch-screen is terrible"
-            }
-        ]
-    }, {
-        "uname": "IsbelDull",
-        "messages": [
-            {
-                "senderLocation": [
-                    31.5,
-                    75.56
-                ],
-                "inResponseTo": 1,
-                "messageId": 6,
-                "authorId": 2,
-                "message": " like product-z its platform is mind-blowing"
-            },
-            {
-                "senderLocation": [
-                    48.09,
-                    81.01
-                ],
-                "inResponseTo": 4,
-                "messageId": 3,
-                "authorId": 2,
-                "message": " like product-y the plan is amazing"
-            }
-        ]
-    } ]
-
-This query is equivalent to the following query that does not use the `LET` clause:
-
-    SELECT u.name AS uname, ( SELECT VALUE m
-                              FROM GleambookMessages m
-                              WHERE m.authorId = u.id
-                            ) AS messages
-    FROM GleambookUsers u
-    WHERE EXISTS ( SELECT VALUE m
-                   FROM GleambookMessages m
-                   WHERE m.authorId = u.id
-                 );
-
-## <a id="Union_all">UNION ALL</a>
-UNION ALL can be used to combine two input arrays or multisets into one. As in SQL, there is no ordering guarantee
-on the contents of the output stream.
-However, unlike SQL, the query language does not constrain what the data looks like on the input streams; in particular,
-it allows heterogeneity on the input and output streams.
-A type error will be raised if one of the inputs is not a collection.
-The following odd but legal query is an example:
-
-##### Example
-
-    SELECT u.name AS uname
-    FROM GleambookUsers u
-    WHERE u.id = 2
-      UNION ALL
-    SELECT VALUE m.message
-    FROM GleambookMessages m
-    WHERE authorId=2;
-
-This query returns:
+Result:
 
     [
-      " like product-z its platform is mind-blowing"
-      , {
-        "uname": "IsbelDull"
-    }, " like product-y the plan is amazing"
-     ]
+        {
+            "custid": "C13",
+            "name": "T. Cody",
+            "rating": 750
+        },
+        {
+            "custid": "C37",
+            "name": "T. Henry",
+            "rating": 750
+        },
+        {
+            "custid": "C25",
+            "name": "M. Sinclair",
+            "rating": 690
+        }
+    ]
 
-## <a id="Over_clauses">OVER Clauses</a> ##
-
-All window functions must have an OVER clause to define the window partitions,
-the order of tuples within those partitions, and the extent of the window frame.
-Some window functions take additional window options, which are specified by
-modifiers before the OVER clause.
-
-The query language has a dedicated set of window functions.
-Aggregate functions can also be used as window functions, when they are used
-with an OVER clause.
-
-### <a id="Window_function_call">Window Function Call</a> ###
-
-    WindowFunctionCall ::= WindowFunctionType "(" WindowFunctionArguments ")"
-    (WindowFunctionOptions)? <OVER> (Variable <AS>)? "(" WindowDefinition ")"
-
-#### <a id="Window_function_type">Window Function Type</a> ####
-
-    WindowFunctionType ::= AggregateFunction | WindowFunction
-
-Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section
-for a list of aggregate functions.
-
-Refer to the [Window Functions](builtins.html#WindowFunctions) section for a
-list of window functions.
-
-#### <a id="Window_function_arguments">Window Function Arguments</a> ####
-
-    WindowFunctionArguments ::= ( (<DISTINCT>)? Expression |
-    (Expression ("," Expression ("," Expression)? )? )? )
-
-Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section or
-the [Window Functions](builtins.html#WindowFunctions) section for details of
-the arguments for individual functions.
-
-### <a id="Window_function_options">Window Function Options</a> ###
-
-    WindowFunctionOptions ::= (NthValFrom)? (NullsTreatment)?
-
-Window function options cannot be used with [aggregate
-functions](builtins.html#AggregateFunctions).
-
-Window function options can only be used with some [window
-functions](builtins.html#WindowFunctions), as described below.
-
-#### <a id="Nth_val_from">Nth Val From</a> ####
-
-    NthValFrom ::= <FROM> ( <FIRST> | <LAST> )
-
-The **nth val from** modifier determines whether the computation begins at the
-first or last tuple in the window.
-
-This modifier can only be used with the `nth_value()` function.
-
-This modifier is optional.
-If omitted, the default setting is `FROM FIRST`.
-
-#### <a id="Nulls_treatment">Nulls Treatment</a> ####
-
-    NullsTreatment ::= ( <RESPECT> | <IGNORE> ) <NULLS>
-
-The **nulls treatment** modifier determines whether NULL values are included in
-the computation, or ignored.
-MISSING values are treated the same way as NULL values.
-
-This modifier can only be used with the `first_value()`, `last_value()`,
-`nth_value()`, `lag()`, and `lead()` functions.
-
-This modifier is optional.
-If omitted, the default setting is `RESPECT NULLS`.
-
-### <a id="Window_frame_variable">Window Frame Variable</a> ###
-
-The AS keyword enables you to specify an alias for the window frame contents.
-It introduces a variable which will be bound to the contents of the frame.
-When using a built-in [aggregate function](builtins.html#AggregateFunctions) as
-a window function, the function’s argument must be a subquery which refers to
-this alias, for example:
-
-    SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field))
-    OVER alias AS (PARTITION BY … ORDER BY …)
-    FROM source AS src
-
-The alias is not necessary when using a [window function](builtins.html#WindowFunctions),
-or when using a standard SQL aggregate function with the OVER clause.
-
-#### <a id="SQL-92_over_clause">Standard SQL Aggregate Functions with the OVER Clause</a> ####
-
-A standard SQL aggregate function with an OVER clause is rewritten by the
-query compiler using a built-in aggregate function over a frame variable.
-For example, the following query with the `sum()` function:
-
-    SELECT SUM(field) OVER (PARTITION BY … ORDER BY …)
-    FROM source AS src
-
-Is rewritten as the following query using the `array_sum()` function:
-
-    SELECT ARRAY_SUM( (SELECT VALUE alias.src.field FROM alias) )
-      OVER alias AS (PARTITION BY … ORDER BY …)
-    FROM source AS src
-
-This is similar to the way that standard SQL aggregate functions are rewritten
-as built-in aggregate functions in the presence of the GROUP BY clause.
-
-### <a id="Window_definition">Window Definition</a> ###
-
-    WindowDefinition ::= (WindowPartitionClause)? (WindowOrderClause
-    (WindowFrameClause (WindowFrameExclusion)? )? )?
-
-The **window definition** specifies the partitioning, ordering, and framing for
-window functions.
-
-#### <a id="Window_partition_clause">Window Partition Clause</a> ####
-
-    WindowPartitionClause ::= <PARTITION> <BY> Expression ("," Expression)*
-
-The **window partition clause** divides the tuples into logical partitions
-using one or more expressions.
-
-This clause may be used with any [window function](builtins.html#WindowFunctions),
-or any [aggregate function](builtins.html#AggregateFunctions) used as a window
-function.
-
-This clause is optional.
-If omitted, all tuples are united in a single partition.
-
-#### <a id="Window_order_clause">Window Order Clause</a> ####
-
-    WindowOrderClause ::= <ORDER> <BY> OrderingTerm ("," OrderingTerm)*
-
-The **window order clause** determines how tuples are ordered within each
-partition.
-The window function works on tuples in the order specified by this clause.
-
-This clause may be used with any [window function](builtins.html#WindowFunctions),
-or any [aggregate function](builtins.html#AggregateFunctions) used as a window
-function.
-
-This clause is optional.
-If omitted, all tuples are considered peers, i.e. their order is tied.
-When tuples in the window partition are tied, each window function behaves
-differently.
-
-* The `row_number()` function returns a distinct number for each tuple.
-  If tuples are tied, the results may be unpredictable.
-
-* The `rank()`, `dense_rank()`, `percent_rank()`, and `cume_dist()` functions
-  return the same result for each tuple.
-
-* For other functions, if the [window frame](#Window_frame_clause) is
-  defined by `ROWS`, the results may be unpredictable.
-  If the window frame is defined by `RANGE` or `GROUPS`, the results are same
-  for each tuple.
-
-This clause may have multiple [ordering terms](#Ordering_term).
-To reduce the number of ties, add additional [ordering terms](#Ordering_term).
-
-##### Note #####
-
-This clause does not guarantee the overall order of the query results.
-To guarantee the order of the final results, use the query ORDER BY clause.
-
-#### <a id="Ordering_term">Ordering Term</a> ####
-
-    OrderingTerm ::= Expression ( <ASC> | <DESC> )?
-
-The **ordering term** specifies an ordering expression and collation.
-
-This clause has the same syntax and semantics as the ordering term for queries.
-Refer to the [ORDER BY Clauses](#Order_By_clauses) section for details.
-
-#### <a id="Window_frame_clause">Window Frame Clause</a> ####
-
-    WindowFrameClause ::= ( <ROWS> | <RANGE> | <GROUPS> ) WindowFrameExtent
-
-The **window frame clause** defines the window frame.
-
-This clause can be used with all
-[aggregate functions](builtins.html#AggregateFunctions)
-and some [window functions](builtins.html#WindowFunctions) —
-refer to the descriptions of individual functions for more details.
-
-This clause is allowed only when the [window order
-clause](#Window_order_clause) is present.
-
-This clause is optional.
-
-* If this clause is omitted and there is no [window order
-  clause](#Window_order_clause), the window frame is the entire partition.
-
-* If this clause is omitted but there is a [window order
-  clause](#Window_order_clause), the window frame becomes all tuples
-  in the partition preceding the current tuple and its peers — the
-  same as `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`.
-
-The window frame can be defined in the following ways:
-
-* `ROWS`: Counts the exact number of tuples within the frame.
-  If window ordering doesn’t result in unique ordering, the function may
-  produce unpredictable results.
-  You can add a unique expression or more window ordering expressions to
-  produce unique ordering.
-
-* `RANGE`: Looks for a value offset within the frame.
-  The function produces deterministic results.
-
-* `GROUPS`: Counts all groups of tied rows within the frame.
-  The function produces deterministic results.
-
-##### Note #####
-
-If this clause uses `RANGE` with either `Expression PRECEDING` or
-`Expression FOLLOWING`, the [window order clause](#Window_order_clause) must
-have only a single ordering term.
-
-The ordering term expression must evaluate to a number.
-<!--
-The ordering term expression must evaluate to a number, a date, a time, or a
-datetime.
-If the ordering term expression evaluates to a date, a time, or a datetime, the
-expression in `Expression PRECEDING` or `Expression FOLLOWING` must evaluate to
-a duration.
--->
-
-If these conditions are not met, the window frame will be empty,
-which means the window function will return its default
-value: in most cases this is NULL, except for `strict_count()` or
-`array_count()`, whose default value is 0.
-
-This restriction does not apply when the window frame uses `ROWS` or
-`GROUPS`.
-
-##### Tip #####
-
-The `RANGE` window frame is commonly used to define window frames based
-on date or time.
-
-If you want to use `RANGE` with either `Expression PRECEDING` or `Expression
-FOLLOWING`, and you want to use an ordering expression based on date or time,
-the expression in `Expression PRECEDING` or `Expression FOLLOWING` must use a
-data type that can be added to the ordering expression.
-
-#### <a id="Window_frame_extent">Window Frame Extent</a> ####
-
-    WindowFrameExtent ::= ( ( <UNBOUNDED> | Expression ) <PRECEDING> | <CURRENT> <ROW> ) |
-    <BETWEEN>
-      ( <UNBOUNDED> <PRECEDING> | <CURRENT> <ROW> | Expression ( <PRECEDING> | <FOLLOWING> ) )
-    <AND>
-      ( <UNBOUNDED> <FOLLOWING> | <CURRENT> <ROW> | Expression ( <PRECEDING> | <FOLLOWING> ) )
-
-The **window frame extent clause** specifies the start point and end point of
-the window frame.
-The expression before `AND` is the start point and the expression after `AND`
-is the end point.
-If `BETWEEN` is omitted, you can only specify the start point; the end point
-becomes `CURRENT ROW`.
-
-The window frame end point can’t be before the start point.
-If this clause violates this restriction explicitly, an error will result.
-If it violates this restriction implicitly, the window frame will be empty,
-which means the window function will return its default value:
-in most cases this is NULL, except for `strict_count()` or
-`array_count()`, whose default value is 0.
-
-Window frame extents that result in an explicit violation are:
-
-* `BETWEEN CURRENT ROW AND Expression PRECEDING`
-
-* `BETWEEN Expression FOLLOWING AND Expression PRECEDING`
-
-* `BETWEEN Expression FOLLOWING AND CURRENT ROW`
-
-Window frame extents that result in an implicit violation are:
-
-* `BETWEEN UNBOUNDED PRECEDING AND Expression PRECEDING` — if `Expression` is
-  too high, some tuples may generate an empty window frame.
-
-* `BETWEEN Expression PRECEDING AND Expression PRECEDING` — if the second
-  `Expression` is greater than or equal to the first `Expression`,
-  all result sets will generate an empty window frame.
-
-* `BETWEEN Expression FOLLOWING AND Expression FOLLOWING` — if the first
-  `Expression` is greater than or equal to the second `Expression`, all result
-  sets will generate an empty window frame.
-
-* `BETWEEN Expression FOLLOWING AND UNBOUNDED FOLLOWING` — if `Expression` is
-  too high, some tuples may generate an empty window frame.
-
-* If the [window frame exclusion clause](#Window_frame_exclusion) is present,
-  any window frame specification may result in empty window frame.
-
-The `Expression` must be a positive constant or an expression that evaluates as
-a positive number.
-For `ROWS` or `GROUPS`, the `Expression` must be an integer.
-
-#### <a id="Window_frame_exclusion">Window Frame Exclusion</a> ####
-
-    WindowFrameExclusion ::= <EXCLUDE> ( <CURRENT> <ROW> | <GROUP> | <TIES> |
-    <NO> <OTHERS> )
-
-The **window frame exclusion clause** enables you to exclude specified
-tuples from the window frame.
-
-This clause can be used with all
-[aggregate functions](builtins.html#AggregateFunctions)
-and some [window functions](builtins.html#WindowFunctions) —
-refer to the descriptions of individual functions for more details.
-
-This clause is allowed only when the [window frame
-clause](#Window_frame_clause) is present.
-
-This clause is optional.
-If this clause is omitted, the default is no exclusion —
-the same as `EXCLUDE NO OTHERS`.
-
-* `EXCLUDE CURRENT ROW`: If the current tuple is still part of the window
-  frame, it is removed from the window frame.
-
-* `EXCLUDE GROUP`: The current tuple and any peers of the current tuple are
-  removed from the window frame.
-
-* `EXCLUDE TIES`: Any peers of the current tuple, but not the current tuple
-  itself, are removed from the window frame.
-
-* `EXCLUDE NO OTHERS`: No additional tuples are removed from the window frame.
-
-If the current tuple is already removed from the window frame, then it remains
-removed from the window frame.
-
-## <a id="Subqueries">Subqueries</a>
-In the query language, an arbitrary subquery can appear anywhere that an expression can appear.
-Unlike SQL-92, as was just alluded to, the subqueries in a SELECT list or a boolean predicate need
-not return singleton, single-column relations.
-Instead, they may return arbitrary collections.
-For example, the following query is a variant of the prior group-by query examples;
-it retrieves an array of up to two "dislike" messages per user.
+The following example illustrates the use of `OFFSET`:
 
 ##### Example
 
-    SELECT uid,
-           (SELECT VALUE m.msg
-            FROM msgs m
-            WHERE m.msg.message LIKE '%dislike%'
-            ORDER BY m.msg.messageId
-            LIMIT 2) AS msgs
-    FROM GleambookMessages message
-    GROUP BY message.authorId AS uid GROUP AS msgs(message AS msg);
+(Q3.38) Find the customer with the third-highest credit rating.
 
-For our sample data set, this query returns:
+    FROM customers AS c
+    SELECT c.custid, c.name, c.rating
+    ORDER BY c.rating DESC
+    LIMIT 1 OFFSET 2;
+Result:
 
-    [ {
-        "msgs": [
-            {
-                "senderLocation": [
-                    41.66,
-                    80.87
-                ],
-                "inResponseTo": 4,
-                "messageId": 2,
-                "authorId": 1,
-                "message": " dislike x-phone its touch-screen is horrible"
-            }
-        ],
-        "uid": 1
-    }, {
-        "msgs": [
-
-        ],
-        "uid": 2
-    } ]
-
-Note that a subquery, like a top-level `SELECT` statment, always returns a collection -- regardless of where
-within a query the subquery occurs -- and again, its result is never automatically cast into a scalar.
-
-## <a id="Vs_SQL-92">Differences from SQL-92</a>
-The query language offers the following additional features beyond SQL-92:
-
-  * Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.
-  * Schema-free: The query language does not assume the existence of a static schema for any data that it processes.
-  * Correlated FROM terms: A right-side FROM term expression can refer to variables defined by FROM terms on its left.
-  * Powerful GROUP BY: In addition to a set of aggregate functions as in standard SQL, the groups created by the `GROUP BY` clause are directly usable in nested queries and/or to obtain nested results.
-  * Generalized SELECT clause: A SELECT clause can return any type of collection, while in SQL-92, a `SELECT` clause has to return a (homogeneous) collection of objects.
+    [
+        {
+            "custid": "C25",
+            "name": "M. Sinclair",
+            "rating": 690
+        }
+    ]
 
 
-The following matrix is a quick "SQL-92 compatibility cheat sheet" for the query language.
 
-| Feature |  The query language | SQL-92 |  Why different?  |
-|----------|--------|-------|------------------|
-| SELECT * | Returns nested objects | Returns flattened concatenated objects | Nested collections are 1st class citizens |
-| SELECT list | order not preserved | order preserved | Fields in a JSON object are not ordered |
-| Subquery | Returns a collection  | The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function | Nested collections are 1st class citizens |
-| LEFT OUTER JOIN |  Fills in `MISSING`(s) for non-matches  |   Fills in `NULL`(s) for non-matches    | "Absence" is more appropriate than "unknown" here  |
-| UNION ALL       | Allows heterogeneous inputs and output | Input streams must be UNION-compatible and output field names are drawn from the first input stream | Heterogenity and nested collections are common |
-| IN constant_expr | The constant expression has to be an array or multiset, i.e., [..,..,...] | The constant collection can be represented as comma-separated items in a paren pair | Nested collections are 1st class citizens |
-| String literal | Double quotes or single quotes | Single quotes only | Double quoted strings are pervasive |
-| Delimited identifiers | Backticks | Double quotes | Double quoted strings are pervasive |
+## <a id="Subqueries">Subqueries</a>
 
-The following SQL-92 features are not implemented yet. However, the query language does not conflict with these features:
+---
 
-  * CROSS JOIN, NATURAL JOIN, UNION JOIN
-  * RIGHT and FULL OUTER JOIN
-  * INTERSECT, EXCEPT, UNION with set semantics
-  * CAST expression
-  * COALESCE expression
-  * ALL and SOME predicates for linking to subqueries
-  * UNIQUE predicate (tests a collection for duplicates)
-  * MATCH predicate (tests for referential integrity)
-  * Row and Table constructors
-  * Preserved order for expressions in a SELECT list
+### Subquery
+**![](../images/diagrams/Subquery.png)**
 
 
+---
+
+A subquery is simply a query surrounded by parentheses. In SQL++, a subquery can appear anywhere that an expression can appear. Like any query, a subquery always returns a collection, even if the collection contains only a single value or is empty. If the subquery has a SELECT clause, it returns a collection of objects. If the subquery has a SELECT VALUE clause, it returns a collection of scalar values. If a single scalar value is expected, the indexing operator [0] can be used to extract the single scalar value from the collection.
+
+##### Example
+
+(Q3.29)(Subquery in SELECT clause)
+For every order that includes item no. 120, find the order number, customer id, and customer name. 
+
+Here, the subquery is used to find a customer name, given a customer id. Since the outer query expects a scalar result, the subquery uses SELECT VALUE and is followed by the indexing operator [0].
+
+    FROM orders AS o, o.items AS i
+    WHERE i.itemno = 120
+    SELECT o.orderno, o.custid,
+        (FROM customers AS c
+         WHERE c.custid = o.custid
+         SELECT VALUE c.name)[0] AS name;
+
+Result:
+
+    [
+        {
+            "orderno": 1003,
+            "custid": "C31",
+            "name": "B. Pruitt"
+        },
+        {
+            "orderno": 1006,
+            "custid": "C41",
+            "name": "R. Dodge"
+        }
+    ]
+
+##### Example
+
+(Q3.30) (Subquery in WHERE clause)
+Find the customer number, name, and rating of all customers whose rating is greater than the average rating.
+
+Here, the subquery is used to find the average rating among all customers. Once again, SELECT VALUE and indexing [0] have been used to get a single scalar value.
+
+    
+    FROM customers AS c1
+    WHERE c1.rating >
+       (FROM customers AS c2
+        SELECT VALUE AVG(c2.rating))[0]
+    SELECT c1.custid, c1.name, c1.rating;
+Result:
+
+    [
+        {
+            "custid": "C13",
+            "name": "T. Cody",
+            "rating": 750
+        },
+        {
+            "custid": "C25",
+            "name": "M. Sinclair",
+            "rating": 690
+        },
+        {
+            "custid": "C37",
+            "name": "T. Henry",
+            "rating": 750
+        }
+    ]
+
+
+##### Example
+
+(Q3.31) (Subquery in FROM clause)
+Compute the total revenue (sum over items of quantity time price) for each order, then find the average, maximum, and minimum total revenue over all orders.
+
+Here, the FROM clause expects to iterate over a collection of objects, so the subquery uses an ordinary SELECT and does not need to be indexed. You might think of a FROM clause as a "natural home" for a subquery.
+
+    FROM
+       (FROM orders AS o, o.items AS i
+        GROUP BY o.orderno
+        SELECT o.orderno, SUM(i.qty * i.price) AS revenue
+       ) AS r
+    SELECT AVG(r.revenue) AS average,
+	       MIN(r.revenue) AS minimum,
+	       MAX(r.revenue) AS maximum;
+
+Result:
+
+    [
+        {
+            "average": 4669.99,
+	        "minimum": 130.45,
+            "maximum": 18847.58
+        }
+    ]
+
+Note the similarity between Q3.26 and Q3.31. This illustrates how a subquery can often be moved into a `WITH` clause to improve the modularity and readability of a query.

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md
index bc36260..5a73096 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/3_query_title.md

@@ -19,7 +19,22 @@
 
 # <a id="Queries">3. Queries</a>
 
-A query can be any legal expression or `SELECT` statement. A query always ends with a semicolon.
+A *query* can be an expression, or it can be constructed from blocks of code called *query blocks*. A query block may contain several clauses, including `SELECT`, `FROM`, `LET`, `WHERE`, `GROUP BY`, and `HAVING`. 
 
-    Query ::= (Expression | SelectStatement) ";"
+---
+### Query
+**![](../images/diagrams/Query.png)**
 
+### Selection
+**![](../images/diagrams/Selection.png)**
+
+### QueryBlock
+**![](../images/diagrams/QueryBlock.png)**
+
+### StreamGenerator
+**![](../images/diagrams/StreamGenerator.png)**
+
+
+---
+
+Note that, unlike SQL, SQL++ allows the `SELECT` clause to appear either at the beginning or at the end of a query block. For some queries, placing the `SELECT` clause at the end may make a query block easier to understand, because the `SELECT` clause refers to variables defined in the other clauses.

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions.md
new file mode 100644
index 0000000..7a74900
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions.md

@@ -0,0 +1,267 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+Window functions are special functions that compute aggregate values over a "window" of input data. Like an ordinary function, a window function returns a value for every item in the input dataset. But in the case of a window function, the value returned by the function can depend not only on the argument of the function, but also on other items in the same collection. For example, a window function applied to a set of employees might return the rank of each employee in the set, as measured by salary. As another example, a window function applied to a set of items, ordered by purchase date, might return the running total of the cost of the items.
+
+A window function call is identified by an `OVER` clause, which can specify three things: partitioning, ordering, and framing. The partitioning specification is like a `GROUP BY`: it splits the input data into partitions. For example, a set of employees might be partitioned by department. The window function, when applied to a given object, is influenced only by other objects in the same partition. The ordering specification is like an `ORDER BY`: it determines the ordering of the objects in each partition. The framing specification defines a "frame" that moves through the partition, defining how the result for each object depends on nearby objects. For example, the frame for a current object might consist of the two objects before and after the current one; or it might consist of all the objects before the current one in the same partition. A window function call may also specify some options that control (for example) how nulls are handled by the function.
+
+  
+Here is an example of a window function call:
+
+  
+
+	SELECT deptno, purchase_date, item, cost,
+		SUM(cost) OVER (
+		    PARTITION BY deptno
+		    ORDER BY purchase_date
+		    ROWS UNBOUNDED PRECEDING) AS running_total_cost
+	FROM purchases
+	ORDER BY deptno, purchase_date
+
+  
+
+This example partitions the `purchases` dataset by department number. Within each department, it orders the `purchases` by date and computes a running total cost for each item, using the frame specification `ROWS UNBOUNDED PRECEDING`. Note that the `ORDER BY` clause in the window function is separate and independent from the `ORDER BY` clause of the query as a whole.
+
+  
+
+The general syntax of a window function call is specified in this section. SQL++ has a set of builtin window functions, which are listed and explained in their respective [section](builtins.html#WindowFunctions) of the builtin functions page. In addition,  standard SQL aggregate functions such as `SUM` and `AVG` can be used as window functions if they are used with an `OVER` clause.
+The query language has a dedicated set of window functions.
+Aggregate functions can also be used as window functions, when they are used with an `OVER` clause.
+
+## <a id="Window_function_call">Window Function Call</a> ##
+
+---
+### WindowFunctionCall
+**![](../images/diagrams/WindowFunctionCall.png)**
+
+### WindowFunctionType
+**![](../images/diagrams/WindowFunctionType.png)**
+
+----
+
+Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section
+for a list of aggregate functions.
+
+Refer to the [Window Functions](builtins.html#WindowFunctions) section for a
+list of window functions.
+
+### <a id="Window_function_arguments">Window Function Arguments</a> ###
+
+---
+
+### WindowFunctionArguments
+**![](../images/diagrams/WindowFunctionArguments.png)**
+
+
+---
+
+Refer to the [Aggregate Functions](builtins.html#AggregateFunctions) section or the [Window Functions](builtins.html#WindowFunctions) section for details of the arguments for individual functions.
+
+### <a id="Window_function_options">Window Function Options</a> ###
+
+---
+
+### WindowFunctionOptions
+**![](../images/diagrams/WindowFunctionOptions.png)**
+
+
+---
+
+Window function options cannot be used with [aggregate functions](builtins.html#AggregateFunctions).
+
+Window function options can only be used with some [window functions](builtins.html#WindowFunctions), as described below.
+
+The *FROM modifier* determines whether the computation begins at the first or last tuple in the window. It is optional and can only be used with the `nth_value()` function. If it is omitted, the default setting is `FROM FIRST`.
+
+The *NULLS modifier*  determines whether NULL values are included in the computation, or ignored. MISSING values are treated the same way as NULL values. It is also optional and can only be used with the `first_value()`, `last_value()`, `nth_value()`, `lag()`, and `lead()` functions. If omitted, the default setting is `RESPECT NULLS`.
+
+### <a id="Window_frame_variable">Window Frame Variable</a> ###
+
+The `AS` keyword enables you to specify an alias for the window frame contents. It introduces a variable which will be bound to the contents of the frame. When using a built-in [aggregate function](builtins.html#AggregateFunctions) as a window function, the function’s argument must be a subquery which refers to this alias, for example:
+
+    SELECT ARRAY_COUNT(DISTINCT (FROM alias SELECT VALUE alias.src.field))
+    OVER alias AS (PARTITION BY … ORDER BY …)
+    FROM source AS src
+
+The alias is not necessary when using a [window function](builtins.html#WindowFunctions), or when using a standard SQL aggregate function with the `OVER` clause.
+
+
+### <a id="Window_definition">Window Definition</a> ###
+---
+
+### WindowDefinition
+**![](../images/diagrams/WindowDefinition.png)**
+
+
+---
+
+The *window definition* specifies the partitioning, ordering, and framing for window functions.
+
+#### <a id="Window_partition_clause">Window Partition Clause</a> ####
+
+---
+
+### WindowPartitionClause
+**![](../images/diagrams/WindowPartitionClause.png)**
+
+
+---
+
+The *window partition clause* divides the tuples into logical partitions
+using one or more expressions.
+
+This clause may be used with any [window function](builtins.html#WindowFunctions),
+or any [aggregate function](builtins.html#AggregateFunctions) used as a window
+function.
+
+This clause is optional.
+If omitted, all tuples are united in a single partition.
+
+#### <a id="Window_order_clause">Window Order Clause</a> ####
+---
+
+### WindowOrderClause
+**![](../images/diagrams/WindowOrderClause.png)**
+
+
+---
+
+The *window order clause* determines how tuples are ordered within each partition. The window function works on tuples in the order specified by this clause.
+
+This clause may be used with any [window function](builtins.html#WindowFunctions), or any [aggregate function](builtins.html#AggregateFunctions) used as a window function.
+
+This clause is optional. If omitted, all tuples are considered peers, i.e. their order is tied. When tuples in the window partition are tied, each window function behaves differently.
+
+* The `row_number()` function returns a distinct number for each tuple.
+  If tuples are tied, the results may be unpredictable.
+
+* The `rank()`, `dense_rank()`, `percent_rank()`, and `cume_dist()` functions
+  return the same result for each tuple.
+
+* For other functions, if the [window frame](#Window_frame_clause) is
+  defined by `ROWS`, the results may be unpredictable.
+  If the window frame is defined by `RANGE` or `GROUPS`, the results are same
+  for each tuple.
+
+##### Note #####
+
+This clause does not guarantee the overall order of the query results. To guarantee the order of the final results, use the query `ORDER BY` clause.
+
+
+#### <a id="Window_frame_clause">Window Frame Clause</a> ####
+
+### WindowFrameClause
+**![](../images/diagrams/WindowFrameClause.png)**
+
+
+The *window frame clause* defines the window frame. It can be used with all [aggregate functions](builtins.html#AggregateFunctions) and some [window functions](builtins.html#WindowFunctions) - refer to the descriptions of individual functions for more details.  It is optional and allowed only when the [window order clause](#Window_order_clause) is present.
+
+* If this clause is omitted and there is no [window order clause](#Window_order_clause), the window frame is the entire partition.
+
+* If this clause is omitted but there is a [window order clause](#Window_order_clause), the window frame becomes all tuples
+  in the partition preceding the current tuple and its peers - the same as `RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW`.
+
+The window frame can be defined in the following ways:
+
+* `ROWS`: Counts the exact number of tuples within the frame. If window ordering doesn’t result in unique ordering, the function may produce unpredictable results. You can add a unique expression or more window ordering expressions to produce unique ordering.
+
+* `RANGE`: Looks for a value offset within the frame. The function produces deterministic results.
+
+* `GROUPS`: Counts all groups of tied rows within the frame. The function produces deterministic results.
+
+##### Note #####
+
+If this clause uses `RANGE` with either *Expr* `PRECEDING` or *Expr* ` FOLLOWING`, the [window order clause](#Window_order_clause) must have only a single ordering term.
+
+The ordering term expression must evaluate to a number.
+
+If these conditions are not met, the window frame will be empty, which means the window function will return its default value: in most cases this is `null`, except for `strict_count()` or `array_count()`, whose default value is 0. This restriction does not apply when the window frame uses `ROWS` or `GROUPS`.
+
+##### Tip #####
+
+The `RANGE` window frame is commonly used to define window frames based
+on date or time.
+
+If you want to use `RANGE` with either *Expr* `PRECEDING` or *Expr* `FOLLOWING`, and you want to use an ordering expression based on date or time, the expression in *Expr* `PRECEDING` or *Expr* `FOLLOWING` must use a data type that can be added to the ordering expression.
+
+#### <a id="Window_frame_extent">Window Frame Extent</a> ####
+---
+
+### WindowFrameExtent
+**![](../images/diagrams/WindowFrameExtent.png)**
+
+
+---
+
+The *window frame extent clause* specifies the start point and end point of the window frame.
+The expression before `AND` is the start point and the expression after `AND` is the end point.
+If `BETWEEN` is omitted, you can only specify the start point; the end point becomes `CURRENT ROW`.
+
+The window frame end point can’t be before the start point. If this clause violates this restriction explicitly, an error will result. If it violates this restriction implicitly, the window frame will be empty, which means the window function will return its default value: in most cases this is `null`, except for `strict_count()` or
+`array_count()`, whose default value is 0.
+
+Window frame extents that result in an explicit violation are:
+
+* `BETWEEN CURRENT ROW AND` *Expr* `PRECEDING`
+
+* `BETWEEN` *Expr* `FOLLOWING AND` *Expr* `PRECEDING`
+
+* `BETWEEN` *Expr* `FOLLOWING AND CURRENT ROW`
+
+Window frame extents that result in an implicit violation are:
+
+* `BETWEEN UNBOUNDED PRECEDING AND` *Expr* `PRECEDING` - if *Expr* is too high, some tuples may generate an empty window frame.
+
+* `BETWEEN` *Expr* `PRECEDING AND` *Expr* `PRECEDING` - if the second  *Expr* is greater than or equal to the first *Expr*, all result sets will generate an empty window frame.
+
+* `BETWEEN` *Expr* `FOLLOWING AND` *Expr* `FOLLOWING` - if the first *Expr* is greater than or equal to the second *Expr*, all result sets will generate an empty window frame.
+
+* `BETWEEN` *Expr* `FOLLOWING AND UNBOUNDED FOLLOWING` - if *Expr* is too high, some tuples may generate an empty window frame.
+
+* If the [window frame exclusion clause](#Window_frame_exclusion) is present, any window frame specification may result in empty window frame.
+
+The *Expr* must be a positive constant or an expression that evaluates as a positive number. For `ROWS` or `GROUPS`, the *Expr* must be an integer.
+
+#### <a id="Window_frame_exclusion">Window Frame Exclusion</a> ####
+
+---
+
+### WindowFrameExclusion
+**![](../images/diagrams/WindowFrameExclusion.png)**
+
+
+---
+
+The *window frame exclusion clause* enables you to exclude specified tuples from the window frame.
+
+This clause can be used with all [aggregate functions](builtins.html#AggregateFunctions) and some [window functions](builtins.html#WindowFunctions) - refer to the descriptions of individual functions for more details.
+
+This clause is allowed only when the [window frame clause](#Window_frame_clause) is present.
+
+This clause is optional. If this clause is omitted, the default is no exclusion - the same as `EXCLUDE NO OTHERS`.
+
+* `EXCLUDE CURRENT ROW`: If the current tuple is still part of the window frame, it is removed from the window frame.
+
+* `EXCLUDE GROUP`: The current tuple and any peers of the current tuple are removed from the window frame.
+
+* `EXCLUDE TIES`: Any peers of the current tuple, but not the current tuple itself, are removed from the window frame.
+
+* `EXCLUDE NO OTHERS`: No additional tuples are removed from the window frame.
+
+If the current tuple is already removed from the window frame, then it remains removed from the window frame.

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions_title.md
new file mode 100644
index 0000000..7f572c4
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/4_windowfunctions_title.md

@@ -0,0 +1,20 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# <a id="Over_clauses">4. Window Functions</a>

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error.md
new file mode 100644
index 0000000..ea4c6a15
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error.md

@@ -0,0 +1,130 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+A query can potentially result in one of the following errors:
+
+ * syntax error,
+ * identifier resolution error,
+ * type error,
+ * resource error.
+
+If the query processor runs into any error, it will
+terminate the ongoing processing of the query and
+immediately return an error message to the client.
+
+## <a id="Syntax_errors">Syntax Errors</a>
+A valid query must satisfy the grammar rules of the query language.
+Otherwise, a syntax error will be raised.
+
+##### Example
+
+(Q4.1)
+
+    customers AS c
+	SELECT *
+
+Since the queryhas no `FROM` keyword before the dataset `customers`,
+we will get a syntax error as follows:
+
+    ERROR: Code: 1 "ASX1001: Syntax error: In line 2 >>customers AS c<< Encountered \"AS\" at column 11. "
+
+##### Example
+
+(Q4.2)
+
+     FROM customers AS c
+	 WHERE type="advertiser"
+	 SELECT *;
+
+Since "type" is a reserved keyword in the query parser,
+we will get a syntax error as follows:
+
+    ERROR: Code: 1 "ASX1001: Syntax error: In line 3 >> WHERE type=\"advertiser\"<< Encountered \"type\" at column 8. ";
+
+
+## <a id="Identifier_resolution_errors">Identifier Resolution Errors</a>
+Referring to an undefined identifier can cause an error if the identifier
+cannot be successfully resolved as a valid field access.
+
+##### Example
+(Q4.3)
+
+     FROM customer AS c
+	 SELECT *
+
+If we have a typo as above in "customers" that misses the dataset name's ending "s",
+we will get an identifier resolution error as follows:
+
+    ERROR: Code: 1 "ASX1077: Cannot find dataset customer in dataverse Commerce nor an alias with name customer! (in line 2, at column 7)"
+
+##### Example
+(Q4.4)
+
+     FROM customers AS c JOIN orders AS o ON c.custid = o.custid
+	 SELECT name, orderno;
+
+If the compiler cannot figure out how to resolve an unqualified field name, which will occur if there is more than one variable in scope (e.g., `customers AS c` and `orders AS o` as above),
+we will get an identifier resolution error as follows:
+
+    ERROR: Code: 1 "ASX1074: Cannot resolve ambiguous alias reference for identifier name (in line 3, at column 9)"
+
+The same can happen when failing to properly identify the `GROUP BY` expression. 
+
+(Q4.5)
+
+	SELECT o.custid, COUNT(o.orderno) AS `order count`
+	FROM orders AS o
+	GROUP BY custid;
+
+Result:
+
+	ERROR: Code: 1 "ASX1073: Cannot resolve alias reference for undefined identifier o (in line 2, at column 8)"
+
+## <a id="Type_errors">Type Errors</a>
+
+The query compiler does type checks based on its available type information.
+In addition, the query runtime also reports type errors if a data model instance
+it processes does not satisfy the type requirement.
+
+##### Example
+(Q4.6)
+
+    get_day(10/11/2020);
+
+Since function `get_day` can only process duration, daytimeduration, date, or datetime input values,
+we will get a type error as follows: 
+
+    ERROR: Code: 1 "ASX0002: Type mismatch: function get-day expects its 1st input parameter to be of type duration, daytimeduration, date or datetime, but the actual input type is double (in line 2, at column 1)"
+
+
+## <a id="Resource_errors">Resource Errors</a>
+A query can potentially exhaust system resources, such
+as the number of open files and disk spaces.
+For instance, the following two resource errors could be potentially
+be seen when running the system:
+
+    Error: no space left on device
+    Error: too many open files
+
+The "no space left on device" issue usually can be fixed by
+cleaning up disk space and reserving more disk space for the system.
+The "too many open files" issue usually can be fixed by a system
+administrator, following the instructions
+[here](https://easyengine.io/tutorials/linux/increase-open-files-limit/).
+

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error_title.md
new file mode 100644
index 0000000..5e2163e
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/5_error_title.md

@@ -0,0 +1,20 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# <a id="Errors">5. Errors</a>

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff.md
new file mode 100644
index 0000000..7956f07
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff.md

@@ -0,0 +1,55 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+ 
+SQL++ offers the following additional features beyond SQL-92:
+
+  * Fully composable and functional: A subquery can iterate over any intermediate collection and can appear anywhere in a query.
+  * Schema-free: The query language does not assume the existence of a static schema for any data that it processes.
+  * Correlated `FROM` terms: A right-side `FROM` term expression can refer to variables defined by `FROM` terms on its left.
+  * Powerful `GROUP BY`: In addition to a set of aggregate functions as in standard SQL, the groups created by the `GROUP BY` clause are directly usable in nested queries and/or to obtain nested results.
+  * Generalized `SELECT` clause: A `SELECT` clause can return any type of collection, while in SQL-92, a `SELECT` clause has to return a (homogeneous) collection of objects.
+
+
+The following matrix is a quick "SQL-92 compatibility cheat sheet" for SQL++.
+
+| Feature |  SQL++ | SQL-92 |  Why different?  |
+|----------|--------|-------|------------------|
+| SELECT * | Returns nested objects | Returns flattened concatenated objects | Nested collections are 1st class citizens |
+| SELECT list | order not preserved | order preserved | Fields in a JSON object are not ordered |
+| Subquery | Returns a collection  | The returned collection is cast into a scalar value if the subquery appears in a SELECT list or on one side of a comparison or as input to a function | Nested collections are 1st class citizens |
+| LEFT OUTER JOIN |  Fills in `MISSING`(s) for non-matches  |   Fills in `NULL`(s) for non-matches    | "Absence" is more appropriate than "unknown" here  |
+| UNION ALL       | Allows heterogeneous inputs and output | Input streams must be UNION-compatible and output field names are drawn from the first input stream | Heterogenity and nested collections are common |
+| IN constant_expr | The constant expression has to be an array or multiset, i.e., [..,..,...] | The constant collection can be represented as comma-separated items in a paren pair | Nested collections are 1st class citizens |
+| String literal | Double quotes or single quotes | Single quotes only | Double quoted strings are pervasive in JSON|
+| Delimited identifiers | Backticks | Double quotes | Double quoted strings are pervasive in JSON |
+
+The following SQL-92 features are not implemented yet. However, SQL++ does not conflict with these features:
+
+  * CROSS JOIN, NATURAL JOIN, UNION JOIN
+  * RIGHT and FULL OUTER JOIN
+  * INTERSECT, EXCEPT, UNION with set semantics
+  * CAST expression
+  * COALESCE expression
+  * ALL and SOME predicates for linking to subqueries
+  * UNIQUE predicate (tests a collection for duplicates)
+  * MATCH predicate (tests for referential integrity)
+  * Row and Table constructors
+  * Preserved order for expressions in a SELECT list
+
+

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff_title.md
new file mode 100644
index 0000000..ad89d97
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/6_sql_diff_title.md

@@ -0,0 +1,20 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# <a id="Vs_SQL-92">6. Differences from SQL-92</a>

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_dml.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_dml.md
new file mode 100644
index 0000000..0afa49c
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_dml.md

@@ -0,0 +1,654 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a id="Lifecycle_management_statements">Lifecycle Management Statements</a>
+
+### <a id="Use">Use Statement</a>
+---
+
+### UseStmnt
+**![](../images/diagrams/UseStmnt.png)**
+
+
+---
+
+At the uppermost level, the world of data is organized into data namespaces called **dataverses**.
+To set the default dataverse for statements, the `USE` statement is provided.
+
+As an example, the following statement sets the default dataverse to be `Commerce`.
+
+	USE Commerce;
+
+### <a id="Sets"> Set Statement</a>
+The `SET` statement can be used to override certain configuration parameters. More information about `SET` can be found in [Appendix 2](#Performance_tuning).
+
+### <a id="Functions"> Function Declaration</a>
+
+When writing a complex query, it can sometimes be helpful to define one or more auxiliary functions that each address a sub-piece of the overall query. 
+
+The `DECLARE FUNCTION` statement supports the creation of such helper functions.
+In general, the function body (expression) can be any legal query expression.
+
+The function named in the `DECLARE FUNCTION` statement is accessible only in the current query. To create a persistent function for use in multiple queries, use the `CREATE FUNCTION` statement.
+
+---
+### FunctionDeclaration
+**![](../images/diagrams/FunctionDeclaration.png)**
+
+### ParameterList
+**![](../images/diagrams/ParameterList.png)**
+
+---
+The following is a simple example of a temporary function definition and its use.
+
+##### Example
+
+    DECLARE FUNCTION nameSearch(customerId){
+		(SELECT c.custid, c.name
+		FROM customers AS c
+		WHERE c.custid = customerId)[0]
+     };
+
+
+	SELECT VALUE nameSearch("C25");
+
+For our sample data set, this returns:
+
+    [
+      { "custid": "C25", "name": "M. Sinclair" }
+    ]
+
+### <a id="Create"> Create Statement</a>
+---
+### CreateStmnt
+**![](../images/diagrams/CreateStmnt.png)**
+
+### QualifiedName
+**![](../images/diagrams/QualifiedName.png)**
+
+### DoubleQualifiedName
+**![](../images/diagrams/DoubleQualifiedName.png)**
+
+---
+
+The `CREATE` statement is used for creating dataverses as well as other persistent artifacts in a dataverse.
+It can be used to create new dataverses, datatypes, datasets, indexes, and user-defined query functions.
+
+#### <a id="Dataverses"> Create Dataverse</a>
+
+---
+### CreateDataverse
+**![](../images/diagrams/CreateDataverse.png)**
+
+---
+The `CREATE DATAVERSE` statement is used to create new dataverses.
+To ease the authoring of reusable query scripts, an optional `IF NOT EXISTS` clause is included to allow
+creation to be requested either unconditionally or only if the dataverse does not already exist.
+If this clause is absent, an error is returned if a dataverse with the indicated name already exists.
+
+The following example creates a new dataverse named `Commerce` if one does not already exist.
+
+##### Example
+
+    CREATE DATAVERSE Commerce IF NOT EXISTS;
+
+#### <a id="Types"> Create Type </a>
+---
+### CreateType
+**![](../images/diagrams/CreateType.png)**
+
+### ObjectTypeDef
+**![](../images/diagrams/ObjectTypeDef.png)**
+
+### ObjectField
+**![](../images/diagrams/ObjectField.png)**
+
+### TypeExpr
+**![](../images/diagrams/TypeExpr.png)**
+
+### ArrayTypeDef
+**![](../images/diagrams/ArrayTypeDef.png)**
+
+### MultisetTypeDef
+**![](../images/diagrams/MultisetTypeDef.png)**
+
+### TypeRef
+**![](../images/diagrams/TypeRef.png)**
+
+---
+
+The `CREATE TYPE` statement is used to create a new named datatype.
+This type can then be used to create stored collections or utilized when defining one or more other datatypes.
+Much more information about the data model is available in the [data model reference guide](../datamodel.html).
+A new type can be a object type, a renaming of another type, an array type, or a multiset type.
+A object type can be defined as being either open or closed.
+Instances of a closed object type are not permitted to contain fields other than those specified in the create type statement.
+Instances of an open object type may carry additional fields, and open is the default for new types if neither option is specified.
+
+The following example creates three new object type called `addressType` ,  `customerType` and `itemType`.
+Their fields are essentially traditional typed name/value pairs (much like SQL fields).
+Since it is defined as (defaulting to) being an open type, instances will be permitted to contain more than what is specified in the type definition. Indeed many of the customer objects contain a rating as well, however this is not necessary for the customer object to be created. As can be seen in the sample data, customers can exist without ratings or with part (or all) of the address missing. 
+
+##### Example
+
+	CREATE TYPE addressType AS {
+	    street:			string,
+	    city:			string,
+	    zipcode:			string?
+	};
+
+    CREATE TYPE customerType AS {
+        custid:			string,
+        name:			string,
+        address:			addressType?
+    };
+
+	CREATE TYPE itemType AS {
+	    itemno:			int,
+	    qty:			int,
+	    price:			int
+	};
+
+Optionally, you may wish to create a type that has an automatically generated primary key field. The example below shows an alternate form of `itemType` which achieves this by setting its primary key, `itemno`, to UUID. (Refer to the Datasets section later for more details on such fields.)
+
+##### Example
+	CREATE TYPE itemType AS {
+	    itemno:			uuid,
+	    qty:			int,
+	    price:			int
+	};
+
+Note that the type of the `itemno` in this example is UUID. This field type can be used if you want to have an autogenerated-PK field. (Refer to the Datasets section later for more details on such fields.)
+
+The next example creates a new object type, closed this time, called `orderType`.
+Instances of this closed type will not be permitted to have extra fields,
+although the `ship_date` field is marked as optional and may thus be `NULL` or `MISSING` in legal instances of the type. The items field is an array of instances of another object type, `itemType`. 
+
+##### Example
+
+	CREATE TYPE orderType AS CLOSED {
+	    orderno:			int,
+	    custid:			string,
+	    order_date:			string,
+	    ship_date:			string?,
+	    items:			[ itemType ]
+	};
+
+#### <a id="Datasets"> Create Dataset</a>
+
+---
+### CreateDataset
+**![](../images/diagrams/CreateDataset.png)**
+
+### CreateInternalDataset
+**![](../images/diagrams/CreateInternalDataset.png)**
+
+### CreateExternalDataset
+**![](../images/diagrams/CreateExternalDataset.png)**
+
+### AdapterName
+**![](../images/diagrams/AdapterName.png)**
+
+### Configuration
+**![](../images/diagrams/Configuration.png)**
+
+### KeyValuePair
+**![](../images/diagrams/KeyValuePair.png)**
+
+### Properties
+**![](../images/diagrams/Properties.png)**
+
+### PrimaryKey
+**![](../images/diagrams/PrimaryKey.png)**
+
+### NestedField
+**![](../images/diagrams/NestedField.png)**
+
+### CompactionPolicy
+**![](../images/diagrams/CompactionPolicy.png)**
+
+---
+
+The `CREATE DATASET` statement is used to create a new dataset.
+Datasets are named, multisets of object type instances;
+they are where data lives persistently and are the usual targets for queries.
+Datasets are typed, and the system ensures that their contents conform to their type definitions.
+An Internal dataset (the default kind) is a dataset whose content lives within and is managed by the system.
+It is required to have a specified unique primary key field which uniquely identifies the contained objects.
+(The primary key is also used in secondary indexes to identify the indexed primary data objects.)
+
+Internal datasets contain several advanced options that can be specified when appropriate.
+One such option is that random primary key (UUID) values can be auto-generated by declaring the field to be UUID and putting `AUTOGENERATED` after the `PRIMARY KEY` identifier.
+In this case, unlike other non-optional fields, a value for the auto-generated PK field should not be provided at insertion time by the user since each object's primary key field value will be auto-generated by the system.
+
+Another advanced option, when creating an Internal dataset, is to specify the merge policy to control which of the
+underlying LSM storage components to be merged.
+(The system supports Log-Structured Merge tree based physical storage for Internal datasets.)
+Currently the system supports four different component merging policies that can be chosen per dataset:
+no-merge, constant, prefix, and correlated-prefix.
+The no-merge policy simply never merges disk components.
+The constant policy merges disk components when the number of components reaches a constant number k that can be configured by the user.
+The prefix policy relies on both component sizes and the number of components to decide which components to merge.
+It works by first trying to identify the smallest ordered (oldest to newest) sequence of components such that the sequence does not contain a single component that exceeds some threshold size M and that either the sum of the component's sizes exceeds M or the number of components in the sequence exceeds another threshold C.
+If such a sequence exists, the components in the sequence are merged together to form a single component.
+Finally, the correlated-prefix policy is similar to the prefix policy, but it delegates the decision of merging the disk components of all the indexes in a dataset to the primary index.
+When the correlated-prefix policy decides that the primary index needs to be merged (using the same decision criteria as for the prefix policy), then it will issue successive merge requests on behalf of all other indexes associated with the same dataset.
+The system's default policy is the prefix policy except when there is a filter on a dataset, where the preferred policy for filters is the correlated-prefix.
+
+Another advanced option shown in the syntax above, related to performance and mentioned above, is that a **filter** can optionally be created on a field to further optimize range queries with predicates on the filter's field.
+Filters allow some range queries to avoid searching all LSM components when the query conditions match the filter.
+(Refer to [Filter-Based LSM Index Acceleration](../sqlpp/filters.html) for more information about filters.)
+
+An External dataset, in contrast to an Internal dataset, has data stored outside of the system's control.
+Files living in HDFS or in the local filesystem(s) of a cluster's nodes are currently supported.
+External dataset support allows queries to treat foreign data as though it were stored in the system,
+making it possible to query "legacy" file data (for example, Hive data) without having to physically import it.
+When defining an External dataset, an appropriate adapter type must be selected for the desired external data.
+(See the [Guide to External Data](../aql/externaldata.html) for more information on the available adapters.)
+
+The following example creates an Internal dataset for storing FacefookUserType objects.
+It specifies that their id field is their primary key.
+
+#### Example
+
+    CREATE INTERNAL DATASET customers(customerType) PRIMARY KEY custid;
+
+The next example creates an Internal dataset (the default kind when no dataset kind is specified) for storing `itemType` objects might look like. It specifies that the `itemno` field should be used as the primary key for the dataset.
+It also specifies that the `itemno` field is an auto-generated field, meaning that a randomly generated UUID value should be assigned to each incoming object by the system. (A user should therefore not attempt to provide a value for this field.)
+
+Note that the `itemno` field's declared type must be UUID in this case.
+
+#### Example
+
+    CREATE DATASET MyItems(itemType) PRIMARY KEY itemno AUTOGENERATED;
+
+The next example creates an External dataset for querying LineItemType objects.
+The choice of the `hdfs` adapter means that this dataset's data actually resides in HDFS.
+The example `CREATE` statement also provides parameters used by the hdfs adapter:
+the URL and path needed to locate the data in HDFS and a description of the data format.
+
+#### Example
+
+    CREATE EXTERNAL DATASET LineItem(LineItemType) USING hdfs (
+      ("hdfs"="hdfs://HOST:PORT"),
+      ("path"="HDFS_PATH"),
+      ("input-format"="text-input-format"),
+      ("format"="delimited-text"),
+      ("delimiter"="|"));
+
+#### <a id="Indices">Create Index</a>
+
+---
+### CreateIndex
+**![](../images/diagrams/CreateIndex.png)**
+
+### CreateSecondaryIndex
+**![](../images/diagrams/CreateSecondaryIndex.png)**
+
+### CreatePrimaryKeyIndex
+**![](../images/diagrams/CreatePrimaryKeyIndex.png)**
+
+### IndexField
+**![](../images/diagrams/IndexField.png)**
+
+### NestedField
+**![](../images/diagrams/NestedField.png)**
+
+### IndexType
+**![](../images/diagrams/IndexType.png)**
+
+
+---
+
+The `CREATE INDEX` statement creates a secondary index on one or more fields of a specified dataset.
+Supported index types include `BTREE` for totally ordered datatypes, `RTREE` for spatial data,
+and `KEYWORD` and `NGRAM` for textual (string) data.
+An index can be created on a nested field (or fields) by providing a valid path expression as an index field identifier.
+
+An indexed field is not required to be part of the datatype associated with a dataset if the dataset's datatype
+is declared as open **and** if the field's type is provided along with its name and if the `ENFORCED` keyword is
+specified at the end of the index definition.
+`ENFORCING` an open field introduces a check that makes sure that the actual type of the indexed field
+(if the optional field exists in the object) always matches this specified (open) field type.
+
+The following example creates a btree index called `cCustIdx` on the `custid` field of the orders dataset.
+This index can be useful for accelerating exact-match queries, range search queries, and joins involving the `custid`field.
+
+#### Example
+
+    CREATE INDEX cCustIdx ON orders(custid) TYPE BTREE;
+
+The following example creates an open btree index called `oCreatedTimeIdx` on the (non-declared) `createdTime` field of the `orders` dataset having `datetime` type.
+This index can be useful for accelerating exact-match queries, range search queries, and joins involving the `createdTime` field.
+The index is enforced so that records that do not have the `createdTime` field or have a mismatched type on the field
+cannot be inserted into the dataset.
+
+#### Example
+
+    CREATE INDEX oCreatedTimeIdx ON orders(createdTime: datetime?) TYPE BTREE ENFORCED;
+
+The following example creates an open btree index called `cAddedTimeIdx` on the (non-declared) `addedTime`
+field of the `customers` dataset having datetime type.
+This index can be useful for accelerating exact-match queries, range search queries,
+and joins involving the `addedTime` field.
+The index is not enforced so that records that do not have the `addedTime` field or have a mismatched type on the field
+can still be inserted into the dataset.
+
+#### Example
+
+    CREATE INDEX cAddedTimeIdx ON customers(addedTime: datetime?);
+
+The following example creates a btree index called `oOrderUserNameIdx` on `orderUserName`,
+a nested field residing within a object-valued user field in the `orders` dataset.
+This index can be useful for accelerating exact-match queries, range search queries,
+and joins involving the nested `orderUserName` field.
+Such nested fields must be singular, i.e., one cannot index through (or on) an array-valued field.
+
+#### Example
+
+    CREATE INDEX oOrderUserNameIdx ON orders(order.orderUserName) TYPE BTREE;
+
+The following example creates an open rtree index called `oOrderLocIdx` on the order-location field of the `orders` dataset. This index can be useful for accelerating queries that use the [`spatial-intersect` function](builtins.html#spatial_intersect) in a predicate involving the sender-location field.
+
+#### Example
+
+    CREATE INDEX oOrderLocIDx ON orders(`order-location` : point?) TYPE RTREE ENFORCED;
+
+The following example creates a 3-gram index called `cUserIdx` on the name field of the `customers` dataset. This index can be used to accelerate some similarity or substring maching queries on the name field. For details refer to the document on [similarity queries](similarity.html#NGram_Index).
+
+#### Example
+
+    CREATE INDEX cUserIdx ON customers(name) TYPE NGRAM(3);
+
+The following example creates a keyword index called `oCityIdx` on the `city` within the `address` field of the `customers` dataset. This keyword index can be used to optimize queries with token-based similarity predicates on the `address` field. For details refer to the document on [similarity queries](similarity.html#Keyword_Index).
+
+#### Example
+
+    CREATE INDEX oCityIdx ON customers(address.city) TYPE KEYWORD;
+
+The following example creates a special secondary index which holds only the primary keys.
+This index is useful for speeding up aggregation queries which involve only primary keys.
+The name of the index is optional. If the name is not specified, the system will generate
+one. When the user would like to drop this index, the metadata can be queried to find the system-generated name.
+
+#### Example
+
+    CREATE PRIMARY INDEX cus_pk_idx ON customers;
+
+An example query that can be accelerated using the primary-key index:
+
+    SELECT COUNT(*) FROM customers;
+
+To look up the the above primary-key index, issue the following query:
+
+    SELECT VALUE i
+    FROM Metadata.`Index` i
+    WHERE i.DataverseName = "Commerce" AND i.DatasetName = "customers";
+
+The query returns:
+
+	[
+	    {
+	        "DataverseName": "Commerce",
+	        "DatasetName": "customers",
+	        "IndexName": "cus_pk_idx",
+	        "IndexStructure": "BTREE",
+	        "SearchKey": [],
+	        "IsPrimary": false,
+	        "Timestamp": "Fri Sep 18 14:15:51 PDT 2020",
+	        "PendingOp": 0
+	    },
+	    {
+	        "DataverseName": "Commerce",
+	        "DatasetName": "customers",
+	        "IndexName": "customers",
+	        "IndexStructure": "BTREE",
+	        "SearchKey": [
+	            [
+	                "custid"
+	            ]
+	        ],
+	        "IsPrimary": true,
+	        "Timestamp": "Thu Jul 16 13:07:37 PDT 2020",
+	        "PendingOp": 0
+	    }
+	]
+
+Remember that `CREATE PRIMARY INDEX` creates a secondary index.
+That is the reason the `IsPrimary` field is false.
+The primary-key index can be identified by the fact that the `SearchKey` field is empty since it only contains primary key fields.
+
+#### <a id="Synonyms"> Create Synonym</a>
+
+---
+### CreateSynonym
+**![](../images/diagrams/CreateSynonym.png)**
+
+
+---
+
+The `CREATE SYNONYM` statement creates a synonym for a given dataset.
+This synonym may be used used instead of the dataset name in `SELECT`, `INSERT`, `UPSERT`, `DELETE`, and `LOAD` statements.
+The target dataset does not need to exist when the synonym is created.
+
+##### Example
+
+    CREATE DATASET customers(customersType) PRIMARY KEY custid;
+
+    CREATE SYNONYM customersSynonym FOR customers;
+
+    SELECT * FROM customersSynonym;
+
+More information on how synonyms are resolved can be found in the appendix section on Variable Resolution.
+
+#### <a id="Create_function">Create Function</a>
+
+The `CREATE FUNCTION` statement creates a **named** function that can then be used and reused in queries.
+The body of a function can be any query expression involving the function's parameters.
+
+---
+### CreateFunction
+**![](../images/diagrams/CreateFunction.png)**
+
+### FunctionParameters
+**![](../images/diagrams/FunctionParameters.png)**
+
+
+---
+The following is an example of a `CREATE FUNCTION` statement which is similar to our earlier `DECLARE FUNCTION` example.
+
+It differs from that example in that it results in a function that is persistently registered by name in the specified dataverse (the current dataverse being used, if not otherwise specified).
+
+##### Example
+
+    CREATE FUNCTION nameSearch(customerId) {
+        (SELECT c.custid, c.name
+         FROM customers AS c
+         WHERE u.custid = customerId)[0]
+     };
+
+The following is an example of CREATE FUNCTION statement that replaces an existing function.
+
+##### Example
+
+    CREATE OR REPLACE FUNCTION friendInfo(userId) {
+        (SELECT u.id, u.name
+         FROM GleambookUsers u
+         WHERE u.id = userId)[0]
+     };
+
+External functions can also be loaded into Libraries via the [UDF API](../udf.html). Given
+an already loaded library `pylib`, a function `sentiment` mapping to a Python method `sent_model.sentiment` in `sentiment_mod`
+would be as follows
+
+##### Example
+
+    CREATE FUNCTION sentiment(a)
+      AS "sentiment_mod", "sent_model.sentiment" AT pylib;
+
+### <a id="Removal">Drop Statement</a>
+
+---
+### DropStmnt
+**![](../images/diagrams/DropStmnt.png)**
+
+### FunctionSignature
+**![](../images/diagrams/FunctionSignature.png)**
+
+---
+
+The `DROP` statement is the inverse of the `CREATE` statement. It can be used to drop dataverses, datatypes, datasets, indexes, functions, and synonyms.
+
+The following examples illustrate some uses of the `DROP` statement.
+
+##### Example
+
+    DROP DATASET customers IF EXISTS;
+
+    DROP INDEX orders.orderidIndex;
+
+    DROP TYPE customers2.customersType;
+
+    DROP FUNCTION nameSearch@1;
+
+    DROP SYNONYM customersSynonym;
+
+    DROP DATAVERSE CommerceData;
+
+When an artifact is dropped, it will be droppped from the current dataverse if none is specified
+(see the `DROP DATASET` example above) or from the specified dataverse (see the `DROP TYPE` example above)
+if one is specified by fully qualifying the artifact name in the `DROP` statement.
+When specifying an index to drop, the index name must be qualified by the dataset that it indexes.
+When specifying a function to drop, since the query language allows functions to be overloaded by their number of arguments,
+the identifying name of the function to be dropped must explicitly include that information.
+(`nameSearch@1` above denotes the 1-argument function named nameSearch in the current dataverse.)
+
+### <a id="Load_statement">Load Statement</a>
+
+---
+### LoadStmnt
+**![](../images/diagrams/LoadStmnt.png)**
+
+### Configuration
+**![](../images/diagrams/Configuration.png)**
+
+### KeyValuePair
+**![](../images/diagrams/KeyValuePair.png)**
+
+---
+
+The `LOAD` statement is used to initially populate a dataset via bulk loading of data from an external file.
+An appropriate adapter must be selected to handle the nature of the desired external data.
+The `LOAD` statement accepts the same adapters and the same parameters as discussed earlier for External datasets.
+(See the [guide to external data](../aql/externaldata.html) for more information on the available adapters.)
+If a dataset has an auto-generated primary key field, the file to be imported should not include that field in it.
+
+The target dataset name may be a synonym introduced by `CREATE SYNONYM` statement.
+
+The following example shows how to bulk load the `customers` dataset from an external file containing data that has been prepared in ADM (Asterix Data Model) format.
+
+##### Example
+
+     LOAD DATASET customers USING localfs
+        (("path"="127.0.0.1:///Users/bignosqlfan/commercenew/gbu.adm"),("format"="adm"));
+
+## <a id="Modification_statements">Modification statements</a>
+
+### <a id="Inserts">Insert Statement</a>
+
+---
+### InsertStmnt
+**![](../images/diagrams/InsertStmnt.png)**
+
+
+---
+
+The `INSERT` statement is used to insert new data into a dataset.
+The data to be inserted comes from a query expression.
+This expression can be as simple as a constant expression, or in general it can be any legal query.
+In case the dataset has an auto-generated primary key, when performing an `INSERT` operation, the system allows the user to manually add the
+auto-generated key field in the `INSERT` statement, or skip that field and the system will automatically generate it and add it. However,
+it is important to note that if the a record already exists in the dataset with the auto-generated key provided by the user, then
+that operation is going to fail. As a general rule, insertion will fail if the dataset already has data with the primary key value(s)
+being inserted.
+
+Inserts are processed transactionally by the system.
+The transactional scope of each insert transaction is the insertion of a single object plus its affiliated secondary index entries (if any).
+If the query part of an insert returns a single object, then the `INSERT` statement will be a single, atomic transaction.
+If the query part returns multiple objects, each object being inserted will be treated as a separate tranaction.
+
+The target dataset name may be a synonym introduced by `CREATE SYNONYM` statement.
+
+The following example illustrates a query-based insertion.
+
+##### Example
+
+    INSERT INTO custCopy (SELECT VALUE c FROM customers c)
+
+### <a id="Upserts">Upsert Statement</a>
+
+---
+### UpsertStmnt
+**![](../images/diagrams/UpsertStmnt.png)**
+
+---
+
+The `UPSERT` statement syntactically mirrors the `INSERT `statement discussed above.
+The difference lies in its semantics, which for `UPSERT` are "add or replace" instead of the `INSERT` "add if not present, else error" semantics.
+Whereas an `INSERT` can fail if another object already exists with the specified key, the analogous `UPSERT` will replace the previous object's value
+with that of the new object in such cases. Like the `INSERT` statement, the system allows the user to manually provide the auto-generated key
+for datasets with an auto-generated key as its primary key. This operation will insert the record if no record with that key already exists, but
+if a record with the key already exists, then the operation will be converted to a replace/update operation.
+
+The target dataset name may be a synonym introduced by `CREATE SYNONYM` statement.
+
+The following example illustrates a query-based upsert operation.
+
+##### Example
+
+    UPSERT INTO custCopy (SELECT VALUE c FROM customers c)
+
+### <a id="Deletes">Delete Statement</a>
+---
+### DeleteStmnt
+**![](../images/diagrams/DeleteStmnt.png)**
+
+
+---
+The `DELETE` statement is used to delete data from a target dataset.
+The data to be deleted is identified by a boolean expression involving the variable bound to the target dataset in the `DELETE` statement.
+
+Deletes are processed transactionally by the system.
+The transactional scope of each delete transaction is the deletion of a single object plus its affiliated secondary index entries (if any).
+If the boolean expression for a delete identifies a single object, then the `DELETE` statement itself will be a single, atomic transaction.
+If the expression identifies multiple objects, then each object deleted will be handled as a separate transaction.
+
+The target dataset name may be a synonym introduced by `CREATE SYNONYM` statement.
+
+The following examples illustrate single-object deletions.
+
+##### Example
+
+    DELETE FROM customers c WHERE c.custid = "C41";
+
+##### Example
+
+    DELETE FROM customers WHERE custid = "C47";
+
+

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_head.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_head.md
new file mode 100644
index 0000000..39a5439
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/7_ddl_head.md

@@ -0,0 +1,34 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# <a id="DDL_and_DML_statements">7. DDL and DML statements</a>
+
+---
+
+### Stmnt
+**![](../images/diagrams/Stmnt.png)**
+
+### SingleStmnt
+**![](../images/diagrams/SingleStmnt.png)**
+
+---
+
+In addition to queries, an implementation of SQL++ needs to support statements for data definition
+and manipulation purposes as well as controlling the context to be used in evaluating query expressions.
+This section details the DDL and DML statements supported in SQL++ as realized today in Apache AsterixDB.

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md
index 93082f7..ad0ec22 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_index_only.md

@@ -19,9 +19,9 @@
 
 ## <a id="Index_Only">Controlling Index-Only-Plan Parameter</a>
 By default, the system tries to build an index-only plan whenever utilizing a secondary index is possible.
-For example, if a SELECT or JOIN query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer
+For example, if a `SELECT` or `JOIN` query can utilize an enforced B+Tree or R-Tree index on a field, the optimizer
 checks whether a secondary-index search alone can generate the result that the query asks for. It
-mainly checks two conditions: (1) predicates used in WHERE only uses the primary key field and/or secondary key field
+mainly checks two conditions: (1) predicates used in `WHERE` only uses the primary key field and/or secondary key field
 and (2) the result does not return any other fields. If these two conditions hold, it builds an index-only plan.
 Since an index-only plan only searches a secondary-index to answer a query, it is faster than
 a non-index-only plan that needs to search the primary index.
@@ -33,5 +33,5 @@
 
     set `compiler.indexonly` "false";
 
-    SELECT m.message AS message
-    FROM GleambookMessages m where m.message = " love product-b its shortcut-menu is awesome:)";
+    SELECT o.order_date AS orderdate
+    FROM orders o where o.order_date = "2020-05-01";

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md
index 350117b..b347017 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parallel_sort.md

@@ -35,7 +35,6 @@
 
     SET `compiler.sort.parallel` "true";
 
-    SELECT VALUE user
-    FROM GleambookUsers AS user
-    ORDER BY ARRAY_LENGTH(user.friendIds) DESC;
-
+    SELECT VALUE o
+    FROM orders AS o
+    ORDER BY ARRAY_LENGTH(o.items) DESC;

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md
index f87771b..86510cf 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_parameters.md

@@ -17,9 +17,10 @@
  ! under the License.
  !-->
 
-The SET statement can be used to override some cluster-wide configuration parameters for a specific request:
+The `SET` statement can be used to override some cluster-wide configuration parameters for a specific request:
 
-    SET <IDENTIFIER> <STRING_LITERAL>
+### SetStmnt
+**![](../images/diagrams/SetStmnt.png)**
 
 As parameter identifiers are qualified names (containing a '.') they have to be escaped using backticks (\`\`).
 Note that changing query parameters will not affect query correctness but only impact performance
@@ -46,8 +47,8 @@
 
     SET `compiler.parallelism` "16";
 
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
+    SELECT c.name AS cname, o.orderno AS orderno
+    FROM customers c JOIN orders o ON c.custid = o.custid;
 
 ## <a id="Memory_parameters">Memory Parameters</a>
 In the system, each blocking runtime operator such as join, group-by and order-by
@@ -76,22 +77,22 @@
 
     SET `compiler.groupmemory` "64MB";
 
-    SELECT msg.authorId, COUNT(*)
-    FROM GleambookMessages msg
-    GROUP BY msg.authorId;
+    SELECT c.custid, COUNT(*)
+    FROM customers c
+    GROUP BY c.custid;
 
 ##### Example
 
     SET `compiler.sortmemory` "67108864";
 
-    SELECT VALUE user
-    FROM GleambookUsers AS user
-    ORDER BY ARRAY_LENGTH(user.friendIds) DESC;
+    SELECT VALUE o
+    FROM orders AS o
+    ORDER BY ARRAY_LENGTH(o.items) DESC;
 
 ##### Example
 
     SET `compiler.joinmemory` "132000KB";
 
-    SELECT u.name AS uname, m.message AS message
-    FROM GleambookUsers u JOIN GleambookMessages m ON m.authorId = u.id;
+    SELECT c.name AS cname, o.ordeno AS orderno
+    FROM customers c JOIN orders o ON c.custid = o.custid;
 

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md
index 988d89f..c3f0450 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_3_resolution.md

@@ -37,13 +37,13 @@
 As a result, in general the system cannot know whether any object in a particular dataset will have a field named `c`.
 These assumptions affect how errors are handled.
 If you try to access dataset `a.b` and no dataset by that name exists, you will get an error and your query will not run.
-However, if you try to access a field `c` in a collection of objects, your query will run and return `missing` for each object that doesn't have a field named `c` – this is because it’s possible that some object (someday) could have such a field.
+However, if you try to access a field `c` in a collection of objects, your query will run and return `missing` for each object that doesn't have a field named `c` - this is because it’s possible that some object (someday) could have such a field.
 
 ## <a id="Binding_variables">Binding Variables</a>
 
 Variables can be bound in the following ways:
 
-1.  WITH and LET clauses bind a variable to the result of an expression in a straightforward way
+1.  `WITH` and `LET` clauses bind a variable to the result of an expression in a straightforward way
 
     Examples:
 
@@ -53,7 +53,7 @@
     `LET pay = salary + bonus`
     binds the variable `pay` to the result of evaluating the expression `salary + bonus`.
 
-2.  FROM, GROUP BY, and SELECT clauses have optional AS subclauses that contain an expression and a name (called an *iteration variable* in a FROM clause, or an alias in GROUP BY or SELECT.)
+2.  `FROM`, `GROUP BY`, and `SELECT` clauses have optional `AS` subclauses that contain an expression and a name (called an *iteration variable* in a `FROM` clause, or an alias in `GROUP BY` or `SELECT`).
 
     Examples:
 
@@ -63,15 +63,15 @@
 
     `SELECT MAX(price) AS highest_price`
 
-    An AS subclause always binds the name (as a variable) to the result of the expression (or, in the case of a FROM clause, to the *individual members* of the collection identified by the expression.)
+    An `AS` subclause always binds the name (as a variable) to the result of the expression (or, in the case of a `FROM` clause, to the *individual members* of the collection identified by the expression).
 
-    It's always a good practice to use the keyword AS when defining an alias or iteration variable.
-    However, as in SQL, the syntax allows the keyword AS to be omitted.
-    For example, the FROM clause above could have been written like this:
+    It's always a good practice to use the keyword `AS` when defining an alias or iteration variable.
+    However, as in SQL, the syntax allows the keyword `AS` to be omitted.
+    For example, the `FROM` clause above could have been written like this:
 
     `FROM customer c, order o`
 
-    Omitting the keyword AS does not affect the binding of variables.
+    Omitting the keyword `AS` does not affect the binding of variables.
     The FROM clause in this example binds variables c and o whether the keyword AS is used or not.
 
     In certain cases, a variable is automatically bound even if no alias or variable-name is specified.
@@ -84,18 +84,18 @@
 
     `SELECT item[0].price` binds a variable named `price`
 
-    Note that a FROM clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn.
+    Note that a `FROM` clause iterates over a collection (usually a dataset), binding a variable to each member of the collection in turn.
     The name of the collection remains in scope, but it is not a variable.
-    For example, consider this FROM clause used in a self-join:
+    For example, consider this `FROM` clause used in a self-join:
 
     `FROM customer AS c1, customer AS c2`
 
-    This FROM clause joins the customer dataset to itself, binding the iteration variables c1 and c2 to objects in the left-hand-side and right-hand-side of the join, respectively.
-    After the FROM clause, c1 and c2 are in scope as variables, and customer remains accessible as a dataset name but not as a variable.
+    This `FROM` clause joins the customer dataset to itself, binding the iteration variables `c1` and `c2` to objects in the left-hand-side and right-hand-side of the join, respectively.
+    After the `FROM` clause, `c1` and `c2` are in scope as variables, and customer remains accessible as a dataset name but not as a variable.
 
-3.  Special rules for GROUP BY:
+3.  Special rules for `GROUP BY`:
 
-    1.  If a GROUP BY clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself.
+    - (3A): If a `GROUP BY` clause specifies an expression that has no explicit alias, it binds a pseudo-variable that is lexicographically identical to the expression itself.
         For example:
 
         `GROUP BY salary + bonus` binds a pseudo-variable named `salary + bonus`.
@@ -109,15 +109,15 @@
             SELECT salary + bonus, COUNT(*) AS how_many
 
         While it might have been more elegant to explicitly require an alias in cases like this, the pseudo-variable rule is retained for SQL compatibility.
-        Note that the expression `salary + bonus` is not *actually* evaluated in the HAVING and SELECT clauses (and could not be since `salary` and `bonus` are no longer individually in scope).
-        Instead, the expression `salary + bonus` is treated as a reference to the pseudo-variable defined in the GROUP BY clause.
+        Note that the expression `salary + bonus` is not *actually* evaluated in the `HAVING` and `SELECT` clauses (and could not be since `salary` and `bonus` are no longer individually in scope).
+        Instead, the expression `salary + bonus` is treated as a reference to the pseudo-variable defined in the `GROUP BY` clause.
 
-    2.  A GROUP BY clause may be followed by a GROUP AS clause that binds a variable to the group.
+    - (3B): The `GROUP BY` clause may be followed by a `GROUP AS` clause that binds a variable to the group.
         The purpose of this variable is to make the individual objects inside the group visible to subqueries that may need to iterate over them.
 
-        The GROUP AS variable is bound to a multiset of objects.
+        The `GROUP AS` variable is bound to a multiset of objects.
         Each object represents one of the members of the group.
-        Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest FROM clause (and its LET subclause, if any).
+        Since the group may have been formed from a join, each of the member-objects contains a nested object for each variable bound by the nearest `FROM` clause (and its `LET` subclause, if any).
         These nested objects, in turn, contain the actual fields of the group-member.
         To understand this process, consider the following query fragment:
 
@@ -128,7 +128,7 @@
         Suppose that the objects in `parts` have fields `partno`, `color`, and `suppno`.
         Suppose that the objects in suppliers have fields `suppno` and `location`.
 
-        Then, for each group formed by the GROUP BY, the variable g will be bound to a multiset with the following structure:
+        Then, for each group formed by the `GROUP BY`, the variable g will be bound to a multiset with the following structure:
 
             [ { "p": { "partno": "p1", "color": "red", "suppno": "s1" },
                 "s": { "suppno": "s1", "location": "Denver" } },
@@ -139,49 +139,49 @@
 
 ## <a id="Scoping">Scoping</a>
 
-In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a WITH clause at the beginning of the query.
+In general, the variables that are in scope at a particular position are those variables that were bound earlier in the current query block, in outer (enclosing) query blocks, or in a `WITH` clause at the beginning of the query.
 More specific rules follow.
 
 The clauses in a query block are conceptually processed in the following order:
 
-* FROM (followed by LET subclause, if any)
-* WHERE
-* GROUP BY (followed by LET subclause, if any)
-* HAVING
-* SELECT or SELECT VALUE
-* ORDER BY
-* OFFSET
-* LIMIT
+* `FROM` (followed by LET subclause, if any)
+* `WHERE`
+* `GROUP BY` (followed by LET subclause, if any)
+* `HAVING`
+* `SELECT` or `SELECT VALUE`
+* `ORDER BY`
+* `OFFSET`
+* `LIMIT`
 
 During processing of each clause, the variables that are in scope are those variables that are bound in the following places:
 
 1.  In earlier clauses of the same query block (as defined by the ordering given above).
 
     Example: `FROM orders AS o SELECT o.date`
-    The variable `o` in the SELECT clause is bound, in turn, to each object in the dataset `orders`.
+    The variable `o` in the `SELECT` clause is bound, in turn, to each object in the dataset `orders`.
 
 2.  In outer query blocks in which the current query block is nested.
     In case of duplication, the innermost binding wins.
 
-3.  In the WITH clause (if any) at the beginning of the query.
+3.  In the `WITH` clause (if any) at the beginning of the query.
 
-However, in a query block where a GROUP BY clause is present:
+However, in a query block where a `GROUP BY` clause is present:
 
-1.  In clauses processed before GROUP BY, scoping rules are the same as though no GROUP BY were present.
+1.  In clauses processed before `GROUP BY`, scoping rules are the same as though no GROUP BY were present.
 
-2.  In clauses processed after GROUP BY, the variables bound in the nearest FROM-clause (and its LET subclause, if any) are removed from scope and replaced by the variables bound in the GROUP BY clause (and its LET subclause, if any).
-    However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (MIN, MAX, AVG, SUM, and COUNT).
+2.  In clauses processed after `GROUP BY`, the variables bound in the nearest `FROM`-clause (and its `LET` subclause, if any) are removed from scope and replaced by the variables bound in the `GROUP BY` clause (and its `LET` subclause, if any).
+    However, this replacement does not apply inside the arguments of the five SQL special aggregating functions (`MIN`, `MAX`, `AVG`, `SUM`, and `COUNT`).
     These functions still need to see the individual data items over which they are computing an aggregation.
     For example, after `FROM employee AS e GROUP BY deptno`, it would not be valid to reference `e.salary`, but `AVG(e.salary)` would be valid.
 
-Special case: In an expression inside a FROM clause, a variable is in scope if it was bound in an earlier expression in the same FROM clause.
+Special case: In an expression inside a `FROM` clause, a variable is in scope if it was bound in an earlier expression in the same `FROM` clause.
 Example:
 
     FROM orders AS o, o.items AS i
 
 The reason for this special case is to support iteration over nested collections.
 
-Note that, since the SELECT clause comes *after* the WHERE and GROUP BY clauses in conceptual processing order, any variables defined in SELECT are not visible in WHERE or GROUP BY.
+Note that, since the `SELECT` clause comes *after* the `WHERE` and `GROUP BY` clauses in conceptual processing order, any variables defined in `SELECT` are not visible in `WHERE` or `GROUP BY`.
 Therefore the following query will not return what might be the expected result (since in the WHERE clause, `pay` will be interpreted as a field in the `emp` object rather than as the computed value `salary + bonus`):
 
     SELECT name, salary + bonus AS pay
@@ -197,27 +197,37 @@
     SELECT e.name, pay
     ORDER BY pay
 
-Note that variables defined by `JOIN` subclauses are not visible to other subclauses in the same `FROM` clause.
-This also applies to the `FROM` variable that starts the `JOIN` subclause.
+Note:
+In the phrase *expr1* `JOIN` *expr2* `ON` *expr3*, variables defined in *expr1* are visible in *expr3* but not in *expr2*. Here's an example that will not work:
+
+	FROM orders AS o JOIN o.items AS i ON 1 = 1
+
+The variable `o`, defined in the phrase before `JOIN`, cannot be used in the phrase immediately following `JOIN`. The probable intent of this example could be accomplished in either of the following ways:
+
+	FROM orders AS o UNNEST o.items AS i
+
+	FROM orders AS o, o.items AS i
+
+To summarize this rule: You may not use left-correlation in an explicit `JOIN` clause.
 
 ## <a id="Resolving_names">Resolving Names</a>
 
 The process of name resolution begins with the leftmost identifier in the name.
 The rules for resolving the leftmost identifier are:
 
-1.  _In a FROM clause_: Names in a FROM clause identify the collections over which the query block will iterate.
+1.  _In a `FROM` clause_: Names in a `FROM` clause identify the collections over which the query block will iterate.
     These collections may be stored datasets or may be the results of nested query blocks.
     A stored dataset may be in a named dataverse or in the default dataverse.
-    Thus, if the two-part name `a.b` is in a FROM clause, a might represent a dataverse and `b` might represent a dataset in that dataverse.
-    Another example of a two-part name in a FROM clause is `FROM orders AS o, o.items AS i`.
-    In `o.items`, `o` represents an order object bound earlier in the FROM clause, and items represents the items object inside that order.
+    Thus, if the two-part name `a.b` is in a `FROM` clause, a might represent a dataverse and `b` might represent a dataset in that dataverse.
+    Another example of a two-part name in a `FROM` clause is `FROM orders AS o, o.items AS i`.
+    In `o.items`, `o` represents an order object bound earlier in the `FROM` clause, and items represents the items object inside that order.
 
-    The rules for resolving the leftmost identifier in a FROM clause (including a JOIN subclause), or in the expression following IN in a quantified predicate, are as follows:
+    The rules for resolving the leftmost identifier in a `FROM` clause (including a `JOIN` subclause), or in the expression following `IN` in a quantified predicate, are as follows:
 
-    1.  If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable.
-        (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery.)
+    - (1A):  If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable.
+        (Note that in the case of a subquery, an in-scope variable might have been bound in an outer query block; this is called a correlated subquery).
 
-    2.  Otherwise, if the identifier is the first part of a two-part name like `a.b`, the name is treated as `dataverse.dataset`.
+    - (1B):  Otherwise, if the identifier is the first part of a two-part name like `a.b`, the name is treated as `dataverse.dataset`.
         If the identifier stands alone as a one-part name, it is treated as the name of a dataset in the default dataverse.
         If the designated dataset exists then the identifier is resolved to that dataset,
         otherwise if a synonym with given name exists then the identifier is resolved to the target dataset of that
@@ -227,29 +237,29 @@
         Datasets take precedence over synonyms, so if both a dataset and a synonym have the same name then the
         resolution is to the dataset.
 
-2.  _Elsewhere in a query block_: In clauses other than FROM, a name typically identifies a field of some object.
-    For example, if the expression `a.b` is in a SELECT or WHERE clause, it's likely that `a` represents an object and `b` represents a field in that object.
+2.  _Elsewhere in a query block_: In clauses other than `FROM`, a name typically identifies a field of some object.
+    For example, if the expression `a.b` is in a `SELECT` or `WHERE` clause, it's likely that `a` represents an object and `b` represents a field in that object.
 
     The rules for resolving the leftmost identifier in clauses other than the ones listed in Rule 1 are:
 
-    1.  If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable.
-        (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block.)
+    - (2A):  If the identifier matches a variable-name that is in scope, it resolves to the binding of that variable.
+        (In the case of a correlated subquery, the in-scope variable might have been bound in an outer query block).
 
-    2.  (The "Single Variable Rule"): Otherwise, if the FROM clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable.
-        For example, in the query `FROM customer SELECT address`, the identifier address is treated as a field in the object bound to the variable customer.
-        At runtime, if the object bound to customer has no `address` field, the `address` expression will return `missing`.
-        If the FROM clause in the current query block binds multiple variables, name resolution fails with an "ambiguous name" error.
-        If there's no FROM clause in the current query block, name resolution fails with an "undefined identifier" error.
+    - (2B):  (The "Single Variable Rule"): Otherwise, if the `FROM` clause in the current query block binds exactly one variable, the identifier is treated as a field access on the object bound to that variable.
+        For example, in the query `FROM customer SELECT address`, the identifier address is treated as a field in the object bound to the variable `customer`.
+        At runtime, if the object bound to `customer` has no `address` field, the `address` expression will return `missing`.
+        If the `FROM` clause in the current query block binds multiple variables, name resolution fails with an "ambiguous name" error.
+        If there's no `FROM` clause in the current query block, name resolution fails with an "undefined identifier" error.
         Note that the Single Variable Rule searches for bound variables only in the current query block, not in outer (containing) blocks.
         The purpose of this rule is to permit the compiler to resolve field-references unambiguously without relying on any schema information.
-        Also note that variables defined by LET clauses do not participate in the resolution process performed by this rule.
+        Also note that variables defined by `LET` clauses do not participate in the resolution process performed by this rule.
 
-        Exception: In a query that has a GROUP BY clause, the Single Variable Rule does not apply in any clauses that occur after the GROUP BY because, in these clauses, the variables bound by the FROM clause are no longer in scope.
-        In clauses after GROUP BY, only Rule 2.1 applies.
+        Exception: In a query that has a `GROUP BY` clause, the Single Variable Rule does not apply in any clauses that occur after the `GROUP BY` because, in these clauses, the variables bound by the `FROM` clause are no longer in scope.
+        In clauses after `GROUP BY`, only Rule (2A) applies.
 
-3.  In an ORDER BY clause following a UNION ALL expression:
+3.  In an `ORDER BY` clause following a `UNION ALL` expression:
 
-    The leftmost identifier is treated as a field-access on the objects that are generated by the UNION ALL.
+    The leftmost identifier is treated as a field-access on the objects that are generated by the `UNION ALL`.
     For example:
 
         query-block-1
@@ -264,9 +274,9 @@
     For example, if the whole query is `ARRAY_COUNT(a.b)` then `a.b` will be treated as dataset `b` contained in
     dataverse `a`.
     Note that this rule only applies to identifiers which are located directly inside a standalone expression.
-    Identifiers inside SELECT statements in a standalone expression are still resolved according to Rules 1-3.
+    Identifiers inside `SELECT` statements in a standalone expression are still resolved according to Rules 1-3.
     For example, if the whole query is `ARRAY_SUM( (FROM employee AS e SELECT VALUE salary) )` then `salary` is resolved
-    as `e.salary` following the "Single Variable Rule" (Rule 2.2).
+    as `e.salary` following the "Single Variable Rule" (Rule (2B)).
 
 5.  Once the leftmost identifier has been resolved, the following dots and identifiers in the name (if any) are treated as a path expression that navigates to a field nested inside that object.
     The name resolves to the field at the end of the path.

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_manual_data.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_manual_data.md
new file mode 100644
index 0000000..7516e3f
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_manual_data.md

@@ -0,0 +1,294 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+This appendix lists the data definitions and the datasets used for the examples provided throughout this manual. 
+
+### <a id="definition_statements">Data Definitions</a>
+
+	CREATE DATAVERSE Commerce IF NOT EXISTS;
+	
+	USE Commerce;
+	
+	CREATE TYPE addressType AS {
+	    street:			string,
+	    city:			string,
+	    zipcode:			string?
+	};
+	
+	CREATE TYPE customerType AS {
+        custid:			string,
+        name:			string,
+	    address:			addressType?
+    };
+	
+	CREATE DATASET customers(customerType)
+	    PRIMARY KEY custid;
+    
+	CREATE TYPE itemType AS {
+	    itemno:			int,
+	    qty:			int,
+	    price:			int
+	};
+
+	CREATE TYPE orderType AS {
+	    orderno:			int,
+	    custid:			string,
+	    order_date:			string,
+	    ship_date:			string?,
+	    items:			[ itemType ]
+	};
+
+	CREATE DATASET orders(orderType)
+	    PRIMARY KEY orderno;
+### <a id="customers_data">Customers Data</a>
+
+	[
+	    {
+	        "custid": "C13",
+	        "name": "T. Cody",
+	        "address": {
+	            "street": "201 Main St.",
+	            "city": "St. Louis, MO",
+	            "zipcode": "63101"
+	        },
+	        "rating": 750
+	    },
+	    {
+	        "custid": "C25",
+	        "name": "M. Sinclair",
+	        "address": {
+	            "street": "690 River St.",
+	            "city": "Hanover, MA",
+	            "zipcode": "02340"
+	        },
+	        "rating": 690
+	    },
+	    {
+	        "custid": "C31",
+	        "name": "B. Pruitt",
+	        "address": {
+	            "street": "360 Mountain Ave.",
+	            "city": "St. Louis, MO",
+	            "zipcode": "63101"
+	        }
+	    },
+	    {
+	        "custid": "C35",
+	        "name": "J. Roberts",
+	        "address": {
+	            "street": "420 Green St.",
+	            "city": "Boston, MA",
+	            "zipcode": "02115"
+	        },
+	        "rating": 565
+	    },
+	    {
+	        "custid": "C37",
+	        "name": "T. Henry",
+	        "address": {
+	            "street": "120 Harbor Blvd.",
+	            "city": "Boston, MA",
+	            "zipcode": "02115"
+	        },
+	        "rating": 750
+	    },
+	    {
+	        "custid": "C41",
+	        "name": "R. Dodge",
+	        "address": {
+	            "street": "150 Market St.",
+	            "city": "St. Louis, MO",
+	            "zipcode": "63101"
+	        },
+	        "rating": 640
+	    },
+	    {
+	        "custid": "C47",
+	        "name": "S. Logan",
+	        "address": {
+	            "street": "Via del Corso",
+	            "city": "Rome, Italy"
+	        },
+	        "rating": 625
+	    }
+	]
+
+
+### <a id="orders_data">Orders Data</a>
+
+	[
+	    {
+	        "orderno": 1001,
+	        "custid": "C41",
+	        "order_date": "2020-04-29",
+	        "ship_date": "2020-05-03",
+	        "items": [
+	            {
+	                "itemno": 347,
+	                "qty": 5,
+	                "price": 19.99
+	            },
+	            {
+	                "itemno": 193,
+	                "qty": 2,
+	                "price": 28.89
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1002,
+	        "custid": "C13",
+	        "order_date": "2020-05-01",
+	        "ship_date": "2020-05-03",
+	        "items": [
+	            {
+	                "itemno": 460,
+	                "qty": 95,
+	                "price": 100.99
+	            },
+	            {
+	                "itemno": 680,
+	                "qty": 150,
+	                "price": 8.75
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1003,
+	        "custid": "C31",
+	        "order_date": "2020-06-15",
+	        "ship_date": "2020-06-16",
+	        "items": [
+	            {
+	                "itemno": 120,
+	                "qty": 2,
+	                "price": 88.99
+	            },
+	            {
+	                "itemno": 460,
+	                "qty": 3,
+	                "price": 99.99
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1004,
+	        "custid": "C35",
+	        "order_date": "2020-07-10",
+	        "ship_date": "2020-07-15",
+	        "items": [
+	            {
+	                "itemno": 680,
+	                "qty": 6,
+	                "price": 9.99
+	            },
+	            {
+	                "itemno": 195,
+	                "qty": 4,
+	                "price": 35
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1005,
+	        "custid": "C37",
+	        "order_date": "2020-08-30",
+	        "items": [
+	            {
+	                "itemno": 460,
+	                "qty": 2,
+	                "price": 99.98
+	            },
+	            {
+	                "itemno": 347,
+	                "qty": 120,
+	                "price": 22
+	            },
+	            {
+	                "itemno": 780,
+	                "qty": 1,
+	                "price": 1500
+	            },
+	            {
+	                "itemno": 375,
+	                "qty": 2,
+	                "price": 149.98
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1006,
+	        "custid": "C41",
+	        "order_date": "2020-09-02",
+	        "ship_date": "2020-09-04",
+	        "items": [
+	            {
+	                "itemno": 680,
+	                "qty": 51,
+	                "price": 25.98
+	            },
+	            {
+	                "itemno": 120,
+	                "qty": 65,
+	                "price": 85
+	            },
+	            {
+	                "itemno": 460,
+	                "qty": 120,
+	                "price": 99.98
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1007,
+	        "custid": "C13",
+	        "order_date": "2020-09-13",
+	        "ship_date": "2020-09-20",
+	        "items": [
+	            {
+	                "itemno": 185,
+	                "qty": 5,
+	                "price": 21.99
+	            },
+	            {
+	                "itemno": 680,
+	                "qty": 1,
+	                "price": 20.5
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1008,
+	        "custid": "C13",
+	        "order_date": "2020-10-13",
+	        "items": [
+	            {
+	                "itemno": 460,
+	                "qty": 20,
+	                "price": 99.99
+	            }
+	        ]
+	    },
+	    {
+	        "orderno": 1009,
+	        "custid": "C13",
+	        "order_date": "2020-10-13",
+	        "items": []
+	    }
+	]

diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_title.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_title.md
new file mode 100644
index 0000000..144cd2b
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_4_title.md

@@ -0,0 +1,20 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a id="Manual_data">Appendix 4. Example Data</a>

diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
index fcb1e14..7ca23bb 100644
--- a/asterixdb/asterix-doc/src/main/user-defined_function/udf.md
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf.md

@@ -38,7 +38,7 @@
 
 Then, in your `cc.conf`, in the `[cc]` section, add the correct `credential.file` path
 
-    [cc]
+    [nc]
     address = 127.0.0.1
     ...
     ...
@@ -77,7 +77,7 @@
     USE udfs;
 
     CREATE FUNCTION mysum(a: int32, b: int32)
-      RETURNS int32
+    RETURNS int32
       AS "org.apache.asterix.external.library.MySumFactory" AT testlib;
 
 ## <a id="PythonUDF">Creating a Python UDF</a>
@@ -126,7 +126,8 @@
 
     USE udfs;
 
-    CREATE FUNCTION sentiment(a) 
+    CREATE FUNCTION sentiment(a)
+    RETURNS TweetType
       AS "sentiment_mod", "sent_model.sentiment" AT pylib;
 
 By default, AsterixDB will treat all external functions as deterministic. It means the function must return the same
@@ -200,7 +201,6 @@
     USE udfs;
 
     CREATE FUNCTION addMentionedUsers(t: TweetType)
-      RETURNS TweetType
       AS "org.apache.asterix.external.library.AddMentionedUsersFactory" AT testlib
       WITH { "resources": { "textFieldName": "text" } };
 
@@ -215,6 +215,23 @@
 You can check the annotated Tweets by querying the `ProcessedTweets` dataset:
 
     SELECT * FROM ProcessedTweets LIMIT 10;
+    
+## <a name="adapter">Installing a user-defined Feed Adapter</a>
+
+First, upload a zip file packaged the same way as a Java UDF, but also containing the adapter you would like to use.
+Next, issue a `CREATE ADAPTER` statement referencing the class name. For example:
+
+    CREATE ADAPTER TweetAdapter
+      AS "org.apache.asterix.external.library.adapter.TestTypedAdapterFactory" AT testlib;
+      
+
+Then, the adapter can be used like any other adapter in a feed.
+
+    CREATE FEED TweetFeed WITH {
+      "adapter-name": "TweetAdapter",
+      "type-name" : "TweetType",
+      "num_output_records": 4
+    };
 
 ## <a name="uninstall">Unstalling an UDF Library</a>
 

diff --git a/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
index 659c13b..322ead9 100644
--- a/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md
+++ b/asterixdb/asterix-doc/src/main/user-defined_function/udf_title.md

@@ -24,4 +24,5 @@
 * [Installing an UDF Library](#installingUDF)
 * [Attaching an UDF on Data Feeds](#UDFOnFeeds)
 * [A quick look of the UDF configuration](#udfConfiguration)
+* [User defined Feed Adapters](#adapter)
 * [Unstalling an UDF Library](#uninstall)
\ No newline at end of file

diff --git a/asterixdb/asterix-doc/src/site/site.xml b/asterixdb/asterix-doc/src/site/site.xml
index f1cd93b..ee71f9a 100644
--- a/asterixdb/asterix-doc/src/site/site.xml
+++ b/asterixdb/asterix-doc/src/site/site.xml

@@ -96,6 +96,7 @@
       <item name="Filter-Based LSM Index Acceleration" href="sqlpp/filters.html"/>
       <item name="Support of Full-text Queries" href="sqlpp/fulltext.html"/>
       <item name="Support of Similarity Queries" href="sqlpp/similarity.html"/>
+      <item name="Support of Interval Joins" href="interval_join.html"/>
     </menu>
 
     <menu name="Deprecated">
commit	32f4269ccd91b892eb5fad250b8a4ab217525123	[log] [tgz]
author	Ian Maxon <ian@maxons.email>	Thu Nov 05 11:05:09 2020 -0800
committer	Ian Maxon <imaxon@uci.edu>	Fri Nov 06 06:48:25 2020 +0000
tree	e67b5ebcbc4501beef36064038f9040d418c8da3
parent	638dbd8bdb2bfa8c98d9f08f1e6f6bded63aa80d [diff]