[ASTERIXDB-2683][OTH] Add tests for filters using large (>50K) in-lists.

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Added a methodology for generating a sequence number
  of items that can be inserted in queries for
  convenience.
- Added test cases for filters using large number
  of items for in-lists.
- Added test cases for using large number of and
  clauses in a single query.

Change-Id: I39f88d6faead10b106ebdc147d0f9137090d061c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/4363
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Hussain Towaileb <hussainht@gmail.com>
Reviewed-by: Till Westmann <tillw@apache.org>
diff --git a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
index 7c1b1f3..22719f6 100644
--- a/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
+++ b/asterixdb/asterix-app/src/test/java/org/apache/asterix/test/common/TestExecutor.java
@@ -64,6 +64,8 @@
 import java.util.function.Predicate;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 import java.util.stream.Stream;
 
 import org.apache.asterix.api.http.server.QueryServiceRequestParameters;
@@ -153,6 +155,8 @@
     private static final Pattern HANDLE_VARIABLE_PATTERN = Pattern.compile("handlevariable=(\\w+)");
     private static final Pattern RESULT_VARIABLE_PATTERN = Pattern.compile("resultvariable=(\\w+)");
     private static final Pattern COMPARE_UNORDERED_ARRAY_PATTERN = Pattern.compile("compareunorderedarray=(\\w+)");
+    private static final Pattern MACRO_PARAM_PATTERN =
+            Pattern.compile("macro (?<name>[\\w-$]+)=(?<value>.*)", Pattern.MULTILINE);
 
     private static final Pattern VARIABLE_REF_PATTERN = Pattern.compile("\\$(\\w+)");
     private static final Pattern HTTP_PARAM_PATTERN =
@@ -191,6 +195,11 @@
     private List<Charset> allCharsets;
     private final Queue<Charset> charsetsRemaining = new ArrayDeque<>();
 
+    // Macro parameter names
+    private static final String MACRO_START_FIELD = "start";
+    private static final String MACRO_END_FIELD = "end";
+    private static final String MACRO_SEPARATOR_FIELD = "separator";
+
     /*
      * Instance members
      */
@@ -672,6 +681,11 @@
             boolean jsonEncoded, Charset responseCharset, Predicate<Integer> responseCodeValidator, boolean cancellable)
             throws Exception {
 
+        final List<Parameter> macroParameters = extractMacro(str);
+        if (!macroParameters.isEmpty()) {
+            str = applySubstitution(str, macroParameters);
+        }
+
         final List<Parameter> additionalParams = extractParameters(str);
         for (Parameter param : additionalParams) {
             params = upsertParam(params, param.getName(), param.getType(), param.getValue());
@@ -1687,6 +1701,20 @@
         return Optional.empty();
     }
 
+    private static List<Parameter> extractMacro(String statement) {
+        List<Parameter> params = new ArrayList<>();
+        final Matcher m = MACRO_PARAM_PATTERN.matcher(statement);
+        while (m.find()) {
+            final Parameter param = new Parameter();
+            String name = m.group("name");
+            param.setName(name);
+            String value = m.group("value");
+            param.setValue(value);
+            params.add(param);
+        }
+        return params;
+    }
+
     public static List<Parameter> extractParameters(String statement) {
         List<Parameter> params = new ArrayList<>();
         final Matcher m = HTTP_PARAM_PATTERN.matcher(statement);
@@ -1883,6 +1911,114 @@
         }
     }
 
+    private String applySubstitution(String statement, List<Parameter> parameters) throws Exception {
+        // Ensure all macro parameters are available
+        Parameter startParameter = parameters.stream()
+                .filter(parameter -> parameter.getName().equalsIgnoreCase(MACRO_START_FIELD)).findFirst().orElse(null);
+        Parameter endParameter = parameters.stream()
+                .filter(parameter -> parameter.getName().equalsIgnoreCase(MACRO_END_FIELD)).findFirst().orElse(null);
+        Parameter separatorParameter =
+                parameters.stream().filter(parameter -> parameter.getName().equalsIgnoreCase(MACRO_SEPARATOR_FIELD))
+                        .findFirst().orElse(null);
+
+        // If any of the parameters is not found, throw an exception
+        if (startParameter == null || endParameter == null || separatorParameter == null) {
+            LOGGER.log(Level.ERROR, "Inappropriate use of macro command. Missing macro parameter");
+            throw new Exception("Inappropriate use of macro command. Missing macro parameter");
+        }
+
+        // Macro tokens
+        String startToken = startParameter.getValue();
+        String endToken = endParameter.getValue();
+        String separatorToken = separatorParameter.getValue();
+
+        // References to original and stripped statement to apply the substitution. To ensure the comments and
+        // parameters in the query will not cause any issues, the substitution will happen on the stripped query, then
+        // the update stripped query will be put back inside the original query
+        String originalStatement = statement;
+        statement = stripAllComments(statement);
+
+        // Repetitively apply the substitution to replace all macro
+        while (statement.contains(startToken) && statement.contains(endToken)) {
+            int startPosition = statement.indexOf(startToken);
+            int endPosition = statement.indexOf(endToken) + endToken.length();
+
+            // Basic check: Ensure start position is less than end position
+            if (endPosition < startPosition) {
+                LOGGER.log(Level.ERROR, "Inappropriate use of macro command. Invalid format");
+                throw new Exception("Inappropriate use of macro command. Invalid format");
+            }
+
+            String command = statement.substring(startPosition, endPosition);
+            String substitute =
+                    command.replace(command, doApplySubstitution(command, startToken, endToken, separatorToken));
+            originalStatement = originalStatement.replaceFirst(Pattern.quote(command), substitute);
+            statement = statement.replaceFirst(Pattern.quote(command), substitute);
+        }
+
+        return originalStatement;
+    }
+
+    private String doApplySubstitution(String command, String start, String end, String separator) throws Exception {
+        // Remove start and end markers
+        command = command.substring(start.length(), command.length() - end.length());
+        String[] commandSplits = command.split(separator);
+        String substitute = "";
+
+        switch (commandSplits[0].toLowerCase()) {
+            // "generate" command
+            case "gen":
+            case "generate":
+                // For generate command, generation type is 2nd argument
+                String type = commandSplits[1];
+
+                switch (type.toLowerCase()) {
+                    // "seq": Generates a sequence of integers, given start and end positions, and a separator
+                    case "seq":
+                    case "sequence":
+                        // sequence command expects start, end, and separator
+                        if (commandSplits.length < 5) {
+                            LOGGER.log(Level.ERROR, "generate sequence command is not formatted properly: " + command);
+                            throw new Exception("generate sequence command is not formatted properly: " + command);
+                        }
+
+                        int startPosition = Integer.parseInt(commandSplits[2]);
+                        int endPosition = Integer.parseInt(commandSplits[3]);
+                        String valuesSeparator = commandSplits[4];
+
+                        substitute = IntStream.range(startPosition, endPosition).mapToObj(Integer::toString)
+                                .collect(Collectors.joining(valuesSeparator));
+                        break;
+                    // "and": generate a sequence of AND clauses
+                    case "and":
+                        // and command expects count and separator
+                        if (commandSplits.length < 4) {
+                            LOGGER.log(Level.ERROR, "generate \"and\" command is not formatted properly: " + command);
+                            throw new Exception("generate \"and\" command is not formatted properly: " + command);
+                        }
+
+                        int count = Integer.parseInt(commandSplits[2]);
+                        String valueSeparator = commandSplits[3];
+
+                        StringBuilder builder = new StringBuilder();
+                        for (int i = 0; i < count - 1; i++) {
+                            builder.append("AND 1 = 1").append(valueSeparator);
+                        }
+                        builder.append("AND 1 = 1");
+                        substitute = builder.toString();
+                        break;
+                    default:
+                        LOGGER.log(Level.ERROR, "gen command - unknown type: " + type);
+                        throw new Exception("gen command - unknown type: " + type);
+                }
+                break;
+            default:
+                LOGGER.log(Level.ERROR, "Unknown macro command");
+                throw new Exception("Unknown macro command");
+        }
+        return substitute;
+    }
+
     protected void fail(boolean runDiagnostics, TestCaseContext testCaseCtx, CompilationUnit cUnit,
             List<TestFileContext> testFileCtxs, ProcessBuilder pb, File testFile, Exception e) throws Exception {
         if (runDiagnostics) {
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.000.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.000.ddl.sqlpp
new file mode 100644
index 0000000..ca9b405
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.000.ddl.sqlpp
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
+create dataverse test;
+use test;
+
+drop type test if exists;
+create type test as open {
+id: int64
+};
+
+drop dataset test if exists;
+create dataset test(test) primary key id;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.001.update.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.001.update.sqlpp
new file mode 100644
index 0000000..b976c1c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.001.update.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+-- macro start=%@
+-- macro end=@%
+-- macro separator=-
+
+use test;
+
+insert into test([
+{"id": 0, "f1": [%@generate-sequence-0-100000-,@%]}
+]);
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.002.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.002.query.sqlpp
new file mode 100644
index 0000000..5ea1a9e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.002.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+-- macro start=%@
+-- macro end=@%
+-- macro separator=-
+
+use test;
+
+select value f1 = [%@generate-seq-0-100000-,@%]
+from test;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.003.ddl.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.003.ddl.sqlpp
new file mode 100644
index 0000000..548e632
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/000/big_in_list.003.ddl.sqlpp
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+drop dataverse test if exists;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/001/big_in_list.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/001/big_in_list.000.query.sqlpp
new file mode 100644
index 0000000..02d1dc4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/big_in_list/001/big_in_list.000.query.sqlpp
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+-- macro start=#$
+-- macro end=^&
+-- macro separator=-
+
+select value count(mydata)
+from [#$gen-sequence-0-100000-,^&] as mydata
+where mydata IN [#$generate-seq-50000-150000-,^&];
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/multiple_and/000/multiple_and.000.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/multiple_and/000/multiple_and.000.query.sqlpp
new file mode 100644
index 0000000..ae9e08c
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/misc/multiple_and/000/multiple_and.000.query.sqlpp
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+-- macro start=%@
+-- macro end=@%
+-- macro separator=-
+
+select value true
+from [1] as mydata
+where 1 = 1
+%@gen-and-1000- @%;
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/000/big_in_list.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/000/big_in_list.000.adm
new file mode 100644
index 0000000..f32a580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/000/big_in_list.000.adm
@@ -0,0 +1 @@
+true
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/001/big_in_list.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/001/big_in_list.000.adm
new file mode 100644
index 0000000..d7e318d
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/big_in_list/001/big_in_list.000.adm
@@ -0,0 +1 @@
+50000
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/multiple_and/000/multiple_and.000.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/multiple_and/000/multiple_and.000.adm
new file mode 100644
index 0000000..f32a580
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/misc/multiple_and/000/multiple_and.000.adm
@@ -0,0 +1 @@
+true
\ No newline at end of file
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
index fd19cd5..128a443 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml
@@ -6106,6 +6106,21 @@
   </test-group>
   <test-group name="misc">
     <test-case FilePath="misc">
+      <compilation-unit name="big_in_list/000">
+        <output-dir compare="Text">big_in_list/000</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="misc">
+      <compilation-unit name="big_in_list/001">
+        <output-dir compare="Text">big_in_list/001</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="misc">
+      <compilation-unit name="multiple_and/000">
+        <output-dir compare="Text">multiple_and/000</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="misc">
       <compilation-unit name="record-serialization-ASTERIXDB-2567">
         <output-dir compare="Text">record-serialization-ASTERIXDB-2567</output-dir>
       </compilation-unit>