[ASTERIXDB-2133] Fix unncessary binary search in GroupFrameAccessor

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- GroupFrameAccessor holds a list of frames from a run during the merge
step of merge sort. However, everytime we access a tuple, it performs
binary search to get the physical tuple index. This patch fixes this
by remembering the last accessed frame. It is expected that tuples
are accessed sequentially (since it's the merge step), which greatly
reduces binary searches

Change-Id: I4a1b19ad47f6b1dda4bd5c417932e4c9ba36a714
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2079
Sonar-Qube: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Contrib: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@apache.org>
diff --git a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
index cac50e7..bf61435 100644
--- a/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
+++ b/hyracks-fullstack/hyracks/hyracks-dataflow-std/src/main/java/org/apache/hyracks/dataflow/std/sort/util/GroupFrameAccessor.java
@@ -57,10 +57,13 @@
     private final RecordDescriptor recordDescriptor;
     private final int minFrameSize;
     private final FrameTupleAccessor frameTupleAccessor;
-    private int lastTupleIndex;
     private int lastFrameId;
+    // the start tuple index of the last accessed frame (inclusive)
+    private int lastFrameStart;
+    // the end tuple index of the last accessed frame (exclusive)
+    private int lastFrameEnd;
     private ByteBuffer buffer;
-    private List<InnerFrameInfo> innerFrameInfos;
+    private final List<InnerFrameInfo> innerFrameInfos;
 
     public GroupFrameAccessor(int minFrameSize, RecordDescriptor recordDescriptor) {
         this.minFrameSize = minFrameSize;
@@ -127,8 +130,9 @@
     @Override
     public void reset(ByteBuffer buffer) {
         this.buffer = buffer;
-        this.lastTupleIndex = -1;
         this.lastFrameId = -1;
+        this.lastFrameStart = -1;
+        this.lastFrameEnd = -1;
         parseGroupedBuffer(0, buffer.limit());
     }
 
@@ -153,12 +157,13 @@
 
     private int resetSubTupleAccessor(int tupleIndex) {
         assert tupleIndex < getTupleCount();
-        if (innerFrameInfos.size() == 1) {
-            return tupleIndex;
+        if (tupleIndex >= lastFrameStart && tupleIndex < lastFrameEnd) {
+            // a special optimization path
+            // since GroupFrameAccessor is used by merge, it is expected that tuples are accessed sequentially
+            // thus, if tuple still fit into the last frame, we do not need to perform binary search
+            return tupleIndex - lastFrameStart;
         }
-        if (tupleIndex == lastTupleIndex) {
-            return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex;
-        }
+        // we perform binary search to get the frame Id
         int subFrameId = Collections.binarySearch(innerFrameInfos, tupleIndex);
         if (subFrameId >= 0) {
             subFrameId++;
@@ -166,9 +171,11 @@
             subFrameId = -subFrameId - 1;
         }
         frameTupleAccessor.reset(buffer, innerFrameInfos.get(subFrameId).start, innerFrameInfos.get(subFrameId).length);
-        lastTupleIndex = tupleIndex;
         lastFrameId = subFrameId;
-        return lastFrameId > 0 ? lastTupleIndex - innerFrameInfos.get(lastFrameId - 1).tupleCount : lastTupleIndex;
+        lastFrameStart = lastFrameId > 0 ? innerFrameInfos.get(lastFrameId - 1).tupleCount : 0;
+        lastFrameEnd = innerFrameInfos.get(lastFrameId).tupleCount;
+
+        return tupleIndex - lastFrameStart;
     }
 
-}
+}
\ No newline at end of file