Merge remote-tracking branch 'gerrit/mad-hatter'

Change-Id: Ia1402644a9ca2878c493293e9dcb92176d589736
diff --git a/asterixdb/asterix-doc/pom.xml b/asterixdb/asterix-doc/pom.xml
index 98e86b3..88cc8cf 100644
--- a/asterixdb/asterix-doc/pom.xml
+++ b/asterixdb/asterix-doc/pom.xml
@@ -52,7 +52,7 @@
             <configuration>
               <target>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/manual.md">
-                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_interval_joins.md,appendix_3_title.md,appendix_3_resolution.md" />
+                  <filelist dir="${project.basedir}/src/main/markdown/sqlpp" files="0_toc.md,1_intro.md,2_expr_title.md,2_expr.md,3_query_title.md,3_declare_dataverse.md,3_declare_function.md,3_query.md,4_error_title.md,4_error.md,5_ddl_head.md,5_ddl_dataset_index.md,5_ddl_function_removal.md,5_ddl_dml.md,appendix_1_title.md,appendix_1_keywords.md,appendix_2_title.md,appendix_2_parameters.md,appendix_2_parallel_sort.md,appendix_2_index_only.md,appendix_2_hints.md,appendix_2_interval_joins.md,appendix_3_title.md,appendix_3_resolution.md" />
                 </concat>
                 <concat destfile="${project.build.directory}/generated-site/markdown/sqlpp/builtins.md">
                   <filelist dir="${project.basedir}/src/main/markdown/builtins" files="0_toc.md,0_toc_sqlpp.md,0_toc_common.md,1_numeric_common.md,1_numeric_delta.md,2_string_common.md,2_string_delta.md,3_binary.md,4_spatial.md,5_similarity.md,6_tokenizing.md,7_temporal.md,7_allens.md,8_record.md,9_aggregate_sql.md,10_comparison.md,11_type.md,13_conditional.md,12_misc.md,15_bitwise.md,14_window.md" />
diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
index f4e2cef..ca1ca19 100644
--- a/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/0_toc.md
@@ -101,4 +101,5 @@
       * [Parallelism Parameter](#Parallelism_parameter)
       * [Interval Joins](#Interval_joins)
       * [Memory Parameters](#Memory_parameters)
+      * [Query Hints](#Query_hints)
 * [Appendix 3. Variable Bindings and Name Resolution](#Variable_bindings_and_name_resolution)
diff --git a/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md
new file mode 100644
index 0000000..0e4f470
--- /dev/null
+++ b/asterixdb/asterix-doc/src/main/markdown/sqlpp/appendix_2_hints.md
@@ -0,0 +1,52 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements.  See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership.  The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License.  You may obtain a copy of the License at
+ !
+ !   http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied.  See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+## <a id="Query_hints">Query Hints</a>
+
+#### <a id="hash_groupby">"hash" GROUP BY hint</a>
+
+The system supports two algorithms for GROUP BY clause evaluation: pre-sorted and hash-based.
+By default it uses the pre-sorted approach: The input data is first sorted on the grouping fields
+and then aggregation is performed on that sorted data. The alternative is a hash-based strategy
+which can be enabled via a `/*+ hash */` GROUP BY hint: The data is aggregated using an in-memory hash-table
+(that can spill to disk if necessary). This approach is recommended for low-cardinality grouping fields.
+
+##### Example:
+
+    SELECT c.address.state, count(*)
+    FROM Customers AS c
+    /*+ hash */ GROUP BY c.address.state
+
+#### <a id="hash_bcast_join">"hash-bcast" JOIN hint</a>
+
+By default the system uses a partitioned-parallel hash join strategy to parallelize the execution of an
+equi-join. In this approach both sides of the join are repartitioned (if necessary) on a hash of the join key;
+potentially matching data items thus arrive at the same partition to be joined locally.
+This strategy is robust, but not always the fastest when one of the join sides is low cardinality and
+the other is high cardinality (since it scans and potentially moves the data from both sides).
+This special case can be better handled by broadcasting (replicating) the smaller side to all data partitions
+of the larger side and not moving the data from the other (larger) side. The system provides a join hint to enable
+this strategy: `/*+ hash-bcast */`. This hint forces the right side of the join to be replicated while the left side
+retains its original partitioning.
+
+##### Example:
+
+    SELECT *
+    FROM Orders AS o JOIN Customers AS c
+    ON o.customer_id /*+ hash-bcast */ = c.customer_id
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
index 5f7036a..37e9959 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/pom.xml
@@ -107,5 +107,18 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hyracks</groupId>
+      <artifactId>hyracks-util</artifactId>
+      <version>${project.version}</version>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
index 1f0e447..78faaff 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-btree/src/main/java/org/apache/hyracks/storage/am/btree/impls/BTree.java
@@ -69,11 +69,16 @@
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.buffercache.IPageWriteCallback;
 import org.apache.hyracks.storage.common.file.BufferedFileHandle;
+import org.apache.hyracks.util.JSONUtil;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public class BTree extends AbstractTreeIndex {
 
     public static final float DEFAULT_FILL_FACTOR = 0.7f;
-
+    private static final Logger LOGGER = LogManager.getLogger();
     private static final long RESTART_OP = Long.MIN_VALUE;
     private static final long FULL_RESTART_OP = Long.MIN_VALUE + 1;
     private static final int MAX_RESTARTS = 10;
@@ -1086,6 +1091,7 @@
                 }
                 ((IBTreeLeafFrame) leafFrame).insertSorted(tuple);
             } catch (HyracksDataException | RuntimeException e) {
+                logState(tuple, e);
                 handleException();
                 throw e;
             }
@@ -1194,6 +1200,24 @@
         public void abort() throws HyracksDataException {
             super.handleException();
         }
+
+        private void logState(ITupleReference tuple, Exception e) {
+            try {
+                ObjectNode state = JSONUtil.createObject();
+                state.set("leafFrame", leafFrame.getState());
+                state.set("interiorFrame", interiorFrame.getState());
+                int tupleSize = Math.max(leafFrame.getBytesRequiredToWriteTuple(tuple),
+                        interiorFrame.getBytesRequiredToWriteTuple(tuple));
+                state.put("tupleSize", tupleSize);
+                state.put("spaceNeeded", tupleWriter.bytesRequired(tuple) + slotSize);
+                state.put("spaceUsed", leafFrame.getBuffer().capacity() - leafFrame.getTotalFreeSpace());
+                state.put("leafMaxBytes", leafMaxBytes);
+                state.put("maxTupleSize", maxTupleSize);
+                LOGGER.error("failed to add tuple {}", state, e);
+            } catch (Throwable t) {
+                e.addSuppressed(t);
+            }
+        }
     }
 
     @SuppressWarnings("rawtypes")
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml b/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
index 6c9276b..af7ec3d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/pom.xml
@@ -109,5 +109,9 @@
       <groupId>com.fasterxml.jackson.core</groupId>
       <artifactId>jackson-databind</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-core</artifactId>
+    </dependency>
   </dependencies>
 </project>
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
index dc59612..18d5653 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/api/ITreeIndexFrame.java
@@ -28,6 +28,9 @@
 import org.apache.hyracks.storage.common.buffercache.IBufferCache;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
 import org.apache.hyracks.storage.common.buffercache.IExtraPageBlockHelper;
+import org.apache.hyracks.util.JSONUtil;
+
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public interface ITreeIndexFrame {
 
@@ -122,4 +125,14 @@
     public ITupleReference getLeftmostTuple() throws HyracksDataException;
 
     public ITupleReference getRightmostTuple() throws HyracksDataException;
+
+    default ObjectNode getState() {
+        ObjectNode json = JSONUtil.createObject();
+        json.put("tupleCount", getTupleCount());
+        json.put("freeSpaceOff", getFreeSpaceOff());
+        json.put("level", getLevel());
+        json.put("pageLsn", getPageLsn());
+        json.put("totalFreeSpace", getTotalFreeSpace());
+        return json;
+    }
 }
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
index 6106358..08d4564 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-common/src/main/java/org/apache/hyracks/storage/am/common/frames/TreeIndexNSMFrame.java
@@ -31,6 +31,10 @@
 import org.apache.hyracks.storage.am.common.api.ITreeIndexTupleWriter;
 import org.apache.hyracks.storage.am.common.ophelpers.SlotOffTupleOff;
 import org.apache.hyracks.storage.common.buffercache.ICachedPage;
+import org.apache.hyracks.util.JSONUtil;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 
 public abstract class TreeIndexNSMFrame implements ITreeIndexFrame {
 
@@ -354,4 +358,20 @@
             return frameTuple;
         }
     }
+
+    @Override
+    public ObjectNode getState() {
+        ObjectNode state = ITreeIndexFrame.super.getState();
+        state.put("largeFlag", getLargeFlag());
+        return state;
+    }
+
+    @Override
+    public String toString() {
+        try {
+            return JSONUtil.convertNode(getState());
+        } catch (JsonProcessingException e) {
+            return "failed to convert json";
+        }
+    }
 }