Merge branch 'gerrit/mad-hatter' into 'gerrit/cheshire-cat'

Change-Id: I3e700b07781bec8fc5b9eabf15a1249ce2be0272
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp
new file mode 100644
index 0000000..22105a4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.query.sqlpp
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*
+ * Description    : Test the issue described in ASTERIXDB-2949
+ * Success        : Yes
+ */
+
+
+SELECT SUBSTR("•\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n•\tabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", 0, 1000) AS s;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm
new file mode 100644
index 0000000..a36b551
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/substr-ASTERIXDB-2949/substr-ASTERIXDB-2949.0.adm
@@ -0,0 +1 @@
+{ "s": "•\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789\n•\tabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\tABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" }
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index f0b20bf..1e142a2 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -9926,6 +9926,11 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="string">
+      <compilation-unit name="substr-ASTERIXDB-2949">
+        <output-dir compare="Text">substr-ASTERIXDB-2949</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="string">
       <compilation-unit name="regexp_position_with_flag/offset0/regex_position0_with_flag">
         <output-dir compare="Text">regexp_position_with_flag/offset0/regex_position0_with_flag</output-dir>
       </compilation-unit>
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
index 828de18..49f6221 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringPointable.java
@@ -463,7 +463,8 @@
             return false;
         }
 
-        builder.reset(out, Math.min(utfLen - byteIdx, (int) (codePointLength * 1.0 * byteIdx / codePointIdx)));
+        int estimateOutBytes = byteIdx == 0 ? codePointLength : (int) (codePointLength * 1.0 * byteIdx / codePointIdx);
+        builder.reset(out, Math.min(utfLen - byteIdx, estimateOutBytes));
         codePointIdx = 0;
         while (byteIdx < utfLen && codePointIdx < codePointLength) {
             builder.appendCodePoint(src.codePointAt(src.getMetaDataLength() + byteIdx));
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
index 452710e..d188f16 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/AbstractVarLenObjectBuilder.java
@@ -85,8 +85,9 @@
                 for (int i = 0; i < diff; i++) {
                     out.writeByte(0);
                 }
-                for (int i = ary.getLength() - 1; i >= actualDataStart + diff; i--) {
-                    ary.getByteArray()[i] = ary.getByteArray()[i - diff];
+                int firstCharIdx = startOffset + estimateMetaLen;
+                for (int dest = ary.getLength() - 1, src = dest - diff; src >= firstCharIdx; dest--, src--) {
+                    ary.getByteArray()[dest] = ary.getByteArray()[src];
                 }
             }
         }
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
index dcf9a10..f088c7e 100644
--- a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/test/java/org/apache/hyracks/data/std/primitive/UTF8StringPointableTest.java
@@ -362,4 +362,37 @@
         assertEquals(0, expected.compareTo(result));
     }
 
+    @Test
+    public void testStringBuilder() throws Exception {
+        UTF8StringBuilder builder = new UTF8StringBuilder();
+        GrowableArray array = new GrowableArray();
+        UTF8StringPointable stringPointable = new UTF8StringPointable();
+        String writtenString;
+        int startIdx;
+
+        array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+        String chunk = "ABC";
+        String originalString = chunk.repeat(699051);
+
+        // test grow path
+        startIdx = array.getLength();
+        builder.reset(array, 2);
+        builder.appendString(originalString);
+        builder.finish();
+        stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+        writtenString = stringPointable.toString();
+        assertEquals(originalString, writtenString);
+
+        // test shrink path
+        array.reset();
+        array.append(STRING_UTF8_MIX.getByteArray(), STRING_UTF8_MIX.getStartOffset(), STRING_UTF8_MIX.getLength());
+        startIdx = array.getLength();
+        builder.reset(array, 699051);
+        builder.appendString(chunk);
+        builder.finish();
+        stringPointable.set(array.getByteArray(), startIdx, array.getLength());
+        writtenString = stringPointable.toString();
+        assertEquals(chunk, writtenString);
+    }
+
 }