Full-text implementation step 1
- Introduced ftcontains() that conducts the full-text search
- This version doesn't support index-based full-text search yet.
After this step, the functionality will be added as a separate patch.
- Added BinaryHashSet derived from BinaryHashMap class.
- Parameter checking during the compilation is not merged yet.
Change-Id: If00a871a8241d6aa6931f97b694d65f164d3ab8c
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1228
Tested-by: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Integration-Tests: Jenkins <jenkins@fulliautomatix.ics.uci.edu>
Reviewed-by: Ian Maxon <imaxon@apache.org>
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
index 78e308f..5b092b1 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/InvertedIndexAccessMethod.java
@@ -1131,7 +1131,7 @@
switch (index.getIndexType()) {
case SINGLE_PARTITION_WORD_INVIX:
case LENGTH_PARTITIONED_WORD_INVIX: {
- return BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(searchKeyType, false);
+ return BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(searchKeyType, false, false);
}
case SINGLE_PARTITION_NGRAM_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX: {
diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
index fc49503..144531e 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/util/FunctionCollection.java
@@ -152,6 +152,7 @@
import org.apache.asterix.runtime.evaluators.functions.EditDistanceDescriptor;
import org.apache.asterix.runtime.evaluators.functions.EditDistanceListIsFilterableDescriptor;
import org.apache.asterix.runtime.evaluators.functions.EditDistanceStringIsFilterableDescriptor;
+import org.apache.asterix.runtime.evaluators.functions.FullTextContainsDescriptor;
import org.apache.asterix.runtime.evaluators.functions.GetItemDescriptor;
import org.apache.asterix.runtime.evaluators.functions.GramTokensDescriptor;
import org.apache.asterix.runtime.evaluators.functions.HashedGramTokensDescriptor;
@@ -595,6 +596,9 @@
functionsToInjectUnkownHandling.add(SimilarityJaccardSortedDescriptor.FACTORY);
functionsToInjectUnkownHandling.add(SimilarityJaccardSortedCheckDescriptor.FACTORY);
+ // full-text function
+ functionsToInjectUnkownHandling.add(FullTextContainsDescriptor.FACTORY);
+
// Record functions.
functionsToInjectUnkownHandling.add(GetRecordFieldsDescriptor.FACTORY);
functionsToInjectUnkownHandling.add(GetRecordFieldValueDescriptor.FACTORY);
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.1.ddl.aql
new file mode 100644
index 0000000..6c731a9
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.1.ddl.aql
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search works as expected.
+ * : query #3 - single string value query
+ * : query #4 - single string value in an ordered list query
+ * : query #5 - single string value in an unordered list query
+ * Expected Result : Success
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.2.update.aql
new file mode 100644
index 0000000..c627cf1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.2.update.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset MyData
+using localfs
+(("path"="asterix_nc1://data/spatial/spatialData2.json"),("format"="adm"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.3.query.aql
new file mode 100644
index 0000000..bc47bb6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.3.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, "database", {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.4.query.aql
new file mode 100644
index 0000000..53cecb6
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.4.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, ["database"], {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.5.query.aql
new file mode 100644
index 0000000..2a4ddea
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-01/fulltext-01.5.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, {{"database"}}, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.1.ddl.aql
new file mode 100644
index 0000000..eba30c4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.1.ddl.aql
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search works as expected.
+ * : query #3 - two string values in [an ordered list] query with "any" option
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #4 - the same as query #3, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * : query #5 - two string values in {{an unordered list}} query with "any" option
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #6 - the same as query #6, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * Expected Result : Success
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.2.update.aql
new file mode 100644
index 0000000..c627cf1
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.2.update.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset MyData
+using localfs
+(("path"="asterix_nc1://data/spatial/spatialData2.json"),("format"="adm"));
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.3.query.aql
new file mode 100644
index 0000000..caa4a9a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.3.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, ["object","database"], {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.4.query.aql
new file mode 100644
index 0000000..dc2b30a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.4.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, ["object","database"], {"mode":"all"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.5.query.aql
new file mode 100644
index 0000000..05c2a37
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.5.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, {{"object","database"}}, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.6.query.aql
new file mode 100644
index 0000000..7cd2428
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-02/fulltext-02.6.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, {{"object","database"}}, {"mode":"all"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.1.ddl.aql
new file mode 100644
index 0000000..9096d89
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.1.ddl.aql
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search works as expected.
+ * : In this test, search predicate is provided as a variable.
+ * : query #3 - two string values in [an ordered list] query with "any" option
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #4 - the same as query #3, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * : query #5 - two string values in {{an unordered list}} query with "any" option
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #6 - the same as query #5, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * : query #7 - two string values in a dataset query with "any" option
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #8 - the same as query #7, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * Expected Result : Success
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create type MyKeyword as closed {
+ keyword_text: string
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
+
+create dataset MyKeywordData(MyKeyword)
+ primary key keyword_text;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.2.update.aql
new file mode 100644
index 0000000..d60dd6a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.2.update.aql
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset MyData
+using localfs
+(("path"="asterix_nc1://data/spatial/spatialData2.json"),("format"="adm"));
+
+insert into dataset MyKeywordData ({"keyword_text":"object"});
+
+insert into dataset MyKeywordData ({"keyword_text":"database"});
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.3.query.aql
new file mode 100644
index 0000000..dcf61c5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.3.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+let $list := ["object", "database"]
+where ftcontains($o.title, $list, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.4.query.aql
new file mode 100644
index 0000000..86fe5d4
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.4.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+let $list := ["object", "database"]
+where ftcontains($o.title, $list, {"mode":"all"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.5.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.5.query.aql
new file mode 100644
index 0000000..5d0a546
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.5.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+let $list := {{"object", "database"}}
+where ftcontains($o.title, $list, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.6.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.6.query.aql
new file mode 100644
index 0000000..7def1fa
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.6.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+let $list := {{"object", "database"}}
+where ftcontains($o.title, $list, {"mode":"all"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.7.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.7.query.aql
new file mode 100644
index 0000000..4cc2f45
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.7.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, for $list in dataset MyKeywordData return $list.keyword_text, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.8.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.8.query.aql
new file mode 100644
index 0000000..bae1da5
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-03/fulltext-03.8.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, for $list in dataset MyKeywordData return $list.keyword_text, {"mode":"all"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.1.ddl.aql
new file mode 100644
index 0000000..14cda48
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.1.ddl.aql
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search DOES NOT work as expected.
+ * : query #3 - a string phrase is provided as a query predicate.
+ * : this should throw an exception since we don't support a phrase search yet.
+ * Expected Result : Exception
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create type MyKeyword as closed {
+ keyword_text: string
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
+
+create dataset MyKeywordData(MyKeyword)
+ primary key keyword_text;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.2.update.aql
new file mode 100644
index 0000000..bd244d0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.2.update.aql
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.3.query.aql
new file mode 100644
index 0000000..2769cbd
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-04/fulltext-04.3.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, "object database", {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.1.ddl.aql
new file mode 100644
index 0000000..011a86e
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.1.ddl.aql
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search DOES NOT work as expected.
+ * : query #3 - a string phrase in an ordered list is provided as a query predicate.
+ * : this should throw an exception since we don't support a phrase search yet.
+ * Expected Result : Exception
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create type MyKeyword as closed {
+ keyword_text: string
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
+
+create dataset MyKeywordData(MyKeyword)
+ primary key keyword_text;
+
+create index fulltext_index_title on MyData(title) type fulltext;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.2.update.aql
new file mode 100644
index 0000000..bd244d0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.2.update.aql
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.3.query.aql
new file mode 100644
index 0000000..5d4bd2f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-05/fulltext-05.3.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, ["object database","systems"], {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.1.ddl.aql
new file mode 100644
index 0000000..e155ba8
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.1.ddl.aql
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search DOES NOT work as expected.
+ * : query #3 - a string phrase in an unordered list is provided as a query predicate.
+ * : this should throw an exception since we don't support a phrase search yet.
+ * Expected Result : Exception
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create type MyKeyword as closed {
+ keyword_text: string
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
+
+create dataset MyKeywordData(MyKeyword)
+ primary key keyword_text;
+
+create index fulltext_index_title on MyData(title) type fulltext;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.2.update.aql
new file mode 100644
index 0000000..bd244d0
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.2.update.aql
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.3.query.aql
new file mode 100644
index 0000000..a509e10
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-06/fulltext-06.3.query.aql
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $o in dataset MyData
+where ftcontains($o.title, {{"object database","systems"}}, {"mode":"any"})
+order by $o.id
+return {"id":$o.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.1.ddl.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.1.ddl.aql
new file mode 100644
index 0000000..13e5506
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.1.ddl.aql
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Description : Full-text search non-index test
+ * : This test is intended to verify that the full-text search works as expected.
+ * : query #3 - two string values in [an ordered list] query with "any" option.
+ * : an ordered list is first initialized by let clause and is being used.
+ * : in this case, "any" option that enforces a disjunctive search will be applied.
+ * : query #4 - the same as query #3, but with a different option - "all"
+ * : in this case, we explicitly specify "all" option that enforces a conjunctive search.
+ * Expected Result : Success
+ *
+*/
+
+drop dataverse test if exists;
+create dataverse test;
+use dataverse test;
+
+create type MyRecord as closed {
+ id: int64,
+ docid: int64,
+ val1: int64,
+ title: string,
+ point: point,
+ kwds: string,
+ line1: line,
+ line2: line,
+ poly1: polygon,
+ poly2: polygon,
+ rec: rectangle,
+ circle: circle
+}
+
+create type MyKeyword as closed {
+ keyword_text: string
+}
+
+create dataset MyData(MyRecord)
+ primary key id;
+
+create dataset MyKeywordData(MyKeyword)
+ primary key keyword_text;
+
+create index fulltext_index_title on MyData(title) type fulltext;
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.2.update.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.2.update.aql
new file mode 100644
index 0000000..d60dd6a
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.2.update.aql
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+load dataset MyData
+using localfs
+(("path"="asterix_nc1://data/spatial/spatialData2.json"),("format"="adm"));
+
+insert into dataset MyKeywordData ({"keyword_text":"object"});
+
+insert into dataset MyKeywordData ({"keyword_text":"database"});
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.3.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.3.query.aql
new file mode 100644
index 0000000..f567c7b
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.3.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $k in dataset MyKeywordData
+for $t in dataset MyData
+where ftcontains($t.title, $k.keyword_text, {"mode":"any"})
+order by $t.id
+return {"id":$t.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.4.query.aql b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.4.query.aql
new file mode 100644
index 0000000..ceeb2ad
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries/fulltext/fulltext-07/fulltext-07.4.query.aql
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+use dataverse test;
+
+for $k in dataset MyKeywordData
+for $t in dataset MyData
+where ftcontains($t.title, $k.keyword_text, {"mode":"all"})
+order by $t.id
+return {"id":$t.id}
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.3.adm
new file mode 100644
index 0000000..a64e700
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.3.adm
@@ -0,0 +1,20 @@
+{ "id": 5 }
+{ "id": 8 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 35 }
+{ "id": 38 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.4.adm
new file mode 100644
index 0000000..a64e700
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.4.adm
@@ -0,0 +1,20 @@
+{ "id": 5 }
+{ "id": 8 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 35 }
+{ "id": 38 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.5.adm
new file mode 100644
index 0000000..a64e700
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-01/fulltext-01.5.adm
@@ -0,0 +1,20 @@
+{ "id": 5 }
+{ "id": 8 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 35 }
+{ "id": 38 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.3.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.3.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.4.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.4.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.5.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.5.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.6.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-02/fulltext-02.6.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.3.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.3.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.4.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.4.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.5.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.5.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.5.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.6.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.6.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.6.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.7.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.7.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.7.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.8.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.8.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-03/fulltext-03.8.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-04/fulltext-04.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-04/fulltext-04.3.adm
new file mode 100644
index 0000000..0787798
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-04/fulltext-04.3.adm
@@ -0,0 +1 @@
+{ "id": 1 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-05/fulltext-05.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-05/fulltext-05.3.adm
new file mode 100644
index 0000000..0787798
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-05/fulltext-05.3.adm
@@ -0,0 +1 @@
+{ "id": 1 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-06/fulltext-06.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-06/fulltext-06.3.adm
new file mode 100644
index 0000000..0787798
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-06/fulltext-06.3.adm
@@ -0,0 +1 @@
+{ "id": 1 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.3.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.3.adm
new file mode 100644
index 0000000..9b34c44
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.3.adm
@@ -0,0 +1,38 @@
+{ "id": 1 }
+{ "id": 2 }
+{ "id": 5 }
+{ "id": 6 }
+{ "id": 8 }
+{ "id": 11 }
+{ "id": 12 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 17 }
+{ "id": 19 }
+{ "id": 20 }
+{ "id": 21 }
+{ "id": 22 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 27 }
+{ "id": 29 }
+{ "id": 30 }
+{ "id": 31 }
+{ "id": 32 }
+{ "id": 35 }
+{ "id": 36 }
+{ "id": 38 }
+{ "id": 41 }
+{ "id": 42 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 47 }
+{ "id": 49 }
+{ "id": 50 }
+{ "id": 51 }
+{ "id": 52 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 57 }
+{ "id": 59 }
+{ "id": 60 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.4.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.4.adm
new file mode 100644
index 0000000..acde73f
--- /dev/null
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/fulltext/fulltext-07/fulltext-07.4.adm
@@ -0,0 +1,14 @@
+{ "id": 8 }
+{ "id": 13 }
+{ "id": 16 }
+{ "id": 19 }
+{ "id": 23 }
+{ "id": 26 }
+{ "id": 29 }
+{ "id": 38 }
+{ "id": 43 }
+{ "id": 46 }
+{ "id": 49 }
+{ "id": 53 }
+{ "id": 56 }
+{ "id": 59 }
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index 67e9909..e4d4ee3 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -381,6 +381,13 @@
</compilation-unit>
</test-case>
</test-group>
+ <test-group name="fulltext">
+ <test-case FilePath="fulltext">
+ <compilation-unit name="fulltext-01">
+ <output-dir compare="Text">fulltext-01</output-dir>
+ </compilation-unit>
+ </test-case>
+ </test-group>
<test-group name="union">
<test-case FilePath="union">
<compilation-unit name="union">
diff --git a/asterixdb/asterix-doc/src/site/markdown/aql/fulltext.md b/asterixdb/asterix-doc/src/site/markdown/aql/fulltext.md
new file mode 100644
index 0000000..921f0b3
--- /dev/null
+++ b/asterixdb/asterix-doc/src/site/markdown/aql/fulltext.md
@@ -0,0 +1,99 @@
+<!--
+ ! Licensed to the Apache Software Foundation (ASF) under one
+ ! or more contributor license agreements. See the NOTICE file
+ ! distributed with this work for additional information
+ ! regarding copyright ownership. The ASF licenses this file
+ ! to you under the Apache License, Version 2.0 (the
+ ! "License"); you may not use this file except in compliance
+ ! with the License. You may obtain a copy of the License at
+ !
+ ! http://www.apache.org/licenses/LICENSE-2.0
+ !
+ ! Unless required by applicable law or agreed to in writing,
+ ! software distributed under the License is distributed on an
+ ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ ! KIND, either express or implied. See the License for the
+ ! specific language governing permissions and limitations
+ ! under the License.
+ !-->
+
+# AsterixDB Support of Full-text search queries #
+
+## <a id="toc">Table of Contents</a> ##
+
+* [Motivation](#Motivation)
+* [Syntax](#Syntax)
+* [Creating and utilizing a Full-text index](#FulltextIndex)
+
+## <a id="Motivation">Motivation</a> <font size="4"><a href="#toc">[Back to TOC]</a></font> ##
+
+Full-Text Search (FTS) queries are widely used in applications where users need to find records that satisfy
+an FTS predicate, i.e., where simple string-based matching is not sufficient. These queries are important when
+finding documents that contain a certain keyword is crucial. FTS queries are different from substring matching
+queries in that FTS queries find their query predicates as exact keywords in the given string, rather than
+treating a query predicate as a sequence of characters. For example, an FTS query that finds “rain” correctly
+returns a document when it contains “rain” as a word. However, a substring-matching query returns a document
+whenever it contains “rain” as a substring, for instance, a document with “brain” or “training” would be
+returned as well.
+
+## <a id="Syntax">Syntax</a> <font size="4"><a href="#toc">[Back to TOC]</a></font> ##
+
+The syntax of AsterixDB FTS follows a portion of the XQuery FullText Search syntax.
+A basic form is as follows:
+
+ ftcontains(Expression1, Expression2, {FullTextOption})
+
+For example, we can execute the following query to find tweet messages where the `message-text` field includes
+“voice” as a word. Please note that an FTS search is case-insensitive.
+Thus, "Voice" or "voice" will be evaluated as the same word.
+
+ use dataverse TinySocial;
+
+ for $msg in dataset TweetMessages
+ where ftcontains($msg.message-text, "voice", {"mode":"any"})
+ return {"id": $msg.id}
+
+The DDL and DML of TinySocial can be found in [ADM: Modeling Semistructed Data in AsterixDB](primer.html#ADM:_Modeling_Semistructed_Data_in_AsterixDB).
+
+The `Expression1` is an expression that should be evaluable as a string at runtime as in the above example
+where `$msg.message-text` is a string field. The `Expression2` can be a string, an (un)ordered list
+of string value(s), or an expression. In the last case, the given expression should be evaluable
+into one of the first two types, i.e., into a string value or an (un)ordered list of string value(s).
+
+The following examples are all valid expressions.
+
+ ... where ftcontains($msg.message-text, "sound", {"mode":"any"})
+ ... where ftcontains($msg.message-text, ["sound", "system"], {"mode":"any"})
+ ... where ftcontains($msg.message-text, {{"speed", "stand", "customization"}}, {"mode":"all"})
+ ... where ftcontains($msg.message-text, let $keyword_list := ["voice", "system"] return $keyword_list, {"mode":"all"})
+ ... where ftcontains($msg.message-text, $keyword_list, {"mode":"any"})
+
+In the last example above, `$keyword_list` should evaluate to a string or an (un)ordered list of string value(s).
+
+The last `FullTextOption` parameter clarifies the given FTS request. Currently, we only have one option named `mode`.
+And as we extend the FTS feature, more options will be added. Please note that the format of `FullTextOption`
+is a record, thus you need to put the option(s) in a record `{}`.
+The `mode` option indicates whether the given FTS query is a conjunctive (AND) or disjunctive (OR) search request.
+This option can be either `“any”` or `“all”`. If one specifies `“any”`, a disjunctive search will be conducted.
+For example, the following query will find documents whose `message-text` field contains “sound” or “system”,
+so a document will be returned if it contains either “sound”, “system”, or both of the keywords.
+
+ ... where ftcontains($msg.message-text, ["sound", "system"], {"mode":"any"})
+
+The other option parameter,`“all”`, specifies a conjunctive search. The following example will find the documents whose
+`message-text` field contains both “sound” and “system”. If a document contains only “sound” or “system” but
+not both, it will not be returned.
+
+ ... where ftcontains($msg.message-text, ["sound", "system"], {"mode":"all"})
+
+Currently AsterixDB doesn’t (yet) support phrase searches, so the following query will not work.
+
+ ... where ftcontains($msg.message-text, "sound system", {"mode":"any"})
+
+As a workaround solution, the following query can be used to achieve a roughly similar goal. The difference is that
+the following query will find documents where `$msg.message-text` contains both “sound” and “system”, but the order
+and adjacency of “sound” and “system” are not checked, unlike in a phrase search. As a result, the query below would
+also return documents with “sound system can be installed.”, “system sound is perfect.”,
+or “sound is not clear. You may need to install a new system.”
+
+ ... where ftcontains($msg.message-text, ["sound", "system"], {"mode":"all"})
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/common/IBinaryTokenizerFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/common/IBinaryTokenizerFactoryProvider.java
index 159c74d..c57dd89 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/common/IBinaryTokenizerFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/common/IBinaryTokenizerFactoryProvider.java
@@ -22,7 +22,8 @@
import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
public interface IBinaryTokenizerFactoryProvider {
- public IBinaryTokenizerFactory getWordTokenizerFactory(ATypeTag typeTag, boolean hashedTokens);
+ public IBinaryTokenizerFactory getWordTokenizerFactory(ATypeTag typeTag, boolean hashedTokens,
+ boolean typeTagAlreadyRemoved);
public IBinaryTokenizerFactory getNGramTokenizerFactory(ATypeTag typeTag, int gramLength, boolean usePrePost,
boolean hashedTokens);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryComparatorFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryComparatorFactoryProvider.java
index 4e0e210..677c004 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryComparatorFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryComparatorFactoryProvider.java
@@ -18,6 +18,8 @@
*/
package org.apache.asterix.formats.nontagged;
+import java.io.Serializable;
+
import org.apache.asterix.dataflow.data.nontagged.comparators.ABinaryComparator;
import org.apache.asterix.dataflow.data.nontagged.comparators.ACirclePartialBinaryComparatorFactory;
import org.apache.asterix.dataflow.data.nontagged.comparators.ADurationPartialBinaryComparatorFactory;
@@ -48,10 +50,9 @@
import org.apache.hyracks.data.std.primitive.LongPointable;
import org.apache.hyracks.data.std.primitive.ShortPointable;
import org.apache.hyracks.data.std.primitive.UTF8StringLowercasePointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringLowercaseTokenPointable;
import org.apache.hyracks.data.std.primitive.UTF8StringPointable;
-import java.io.Serializable;
-
public class BinaryComparatorFactoryProvider implements IBinaryComparatorFactoryProvider, Serializable {
private static final long serialVersionUID = 1L;
@@ -74,6 +75,10 @@
// case-insensitive comparisons.
public static final PointableBinaryComparatorFactory UTF8STRING_LOWERCASE_POINTABLE_INSTANCE =
new PointableBinaryComparatorFactory(UTF8StringLowercasePointable.FACTORY);
+ // Equivalent to UTF8STRING_LOWERCASE_POINTABLE_INSTANCE but the length information is kept separately,
+ // rather than keeping them in the beginning of a string. It is especially useful for the string tokens
+ public static final PointableBinaryComparatorFactory UTF8STRING_LOWERCASE_TOKEN_POINTABLE_INSTANCE =
+ new PointableBinaryComparatorFactory(UTF8StringLowercaseTokenPointable.FACTORY);
public static final PointableBinaryComparatorFactory BINARY_POINTABLE_INSTANCE =
new PointableBinaryComparatorFactory(ByteArrayPointable.FACTORY);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
index 58740ee..084a811 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/formats/nontagged/BinaryTokenizerFactoryProvider.java
@@ -38,9 +38,13 @@
new DelimitedUTF8StringBinaryTokenizerFactory(true, true,
new UTF8WordTokenFactory(ATypeTag.SERIALIZED_STRING_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG));
+ private static final IBinaryTokenizerFactory aqlStringNoTypeTagTokenizer =
+ new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
+ new UTF8WordTokenFactory(ATypeTag.STRING.serialize(), ATypeTag.INT32.serialize()));
+
private static final IBinaryTokenizerFactory aqlHashingStringTokenizer =
- new DelimitedUTF8StringBinaryTokenizerFactory(true, true,
- new HashedUTF8WordTokenFactory(ATypeTag.SERIALIZED_INT32_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG));
+ new DelimitedUTF8StringBinaryTokenizerFactory(true, true, new HashedUTF8WordTokenFactory(
+ ATypeTag.SERIALIZED_INT32_TYPE_TAG, ATypeTag.SERIALIZED_INT32_TYPE_TAG));
private static final IBinaryTokenizerFactory orderedListTokenizer = new AOrderedListBinaryTokenizerFactory(
new AListElementTokenFactory());
@@ -49,10 +53,17 @@
new AListElementTokenFactory());
@Override
- public IBinaryTokenizerFactory getWordTokenizerFactory(ATypeTag typeTag, boolean hashedTokens) {
+ public IBinaryTokenizerFactory getWordTokenizerFactory(ATypeTag typeTag, boolean hashedTokens,
+ boolean typeTageAlreadyRemoved) {
switch (typeTag) {
case STRING:
- return hashedTokens ? aqlHashingStringTokenizer : aqlStringTokenizer;
+ if (hashedTokens) {
+ return aqlHashingStringTokenizer;
+ } else if (!typeTageAlreadyRemoved) {
+ return aqlStringTokenizer;
+ } else {
+ return aqlStringNoTypeTagTokenizer;
+ }
case ORDEREDLIST:
return orderedListTokenizer;
case UNORDEREDLIST:
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
index 29a693c..f4c0c38 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/functions/AsterixBuiltinFunctions.java
@@ -486,6 +486,10 @@
public static final FunctionIdentifier EDIT_DISTANCE_CONTAINS = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
"edit-distance-contains", 3);
+ // full-text
+ public static final FunctionIdentifier FULLTEXT_CONTAINS = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
+ "ftcontains", 3);
+
// tokenizers:
public static final FunctionIdentifier WORD_TOKENS = new FunctionIdentifier(FunctionConstants.ASTERIX_NS,
"word-tokens", 1);
@@ -1027,6 +1031,9 @@
addPrivateFunction(SIMILARITY_JACCARD_PREFIX, AFloatTypeComputer.INSTANCE, true);
addPrivateFunction(SIMILARITY_JACCARD_PREFIX_CHECK, OrderedListOfAnyTypeComputer.INSTANCE, true);
+ // Full-text function
+ addFunction(FULLTEXT_CONTAINS, ABooleanTypeComputer.INSTANCE, true);
+
// Spatial functions
addFunction(SPATIAL_AREA, ADoubleTypeComputer.INSTANCE, true);
addFunction(SPATIAL_CELL, ARectangleTypeComputer.INSTANCE, true);
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/ConstantExpressionUtil.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/ConstantExpressionUtil.java
index c67030a..e627d95 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/ConstantExpressionUtil.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/ConstantExpressionUtil.java
@@ -45,7 +45,16 @@
return null;
}
final IAObject iaObject = ((AsterixConstantValue) acv).getObject();
- return iaObject.getType().getTypeTag() == typeTag ? iaObject : null;
+ if (typeTag != null) {
+ return iaObject.getType().getTypeTag() == typeTag ? iaObject : null;
+ } else {
+ return iaObject;
+ }
+ }
+
+ public static ATypeTag getConstantIaObjectType(ILogicalExpression expr) {
+ IAObject iaObject = getConstantIaObject(expr, null);
+ return iaObject.getType().getTypeTag();
}
public static Long getLongConstant(ILogicalExpression expr) {
diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/NonTaggedFormatUtil.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/NonTaggedFormatUtil.java
index f46e7da..0608b79 100644
--- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/NonTaggedFormatUtil.java
+++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/util/NonTaggedFormatUtil.java
@@ -232,7 +232,7 @@
switch (indexType) {
case SINGLE_PARTITION_WORD_INVIX:
case LENGTH_PARTITIONED_WORD_INVIX: {
- return BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(keyType, false);
+ return BinaryTokenizerFactoryProvider.INSTANCE.getWordTokenizerFactory(keyType, false, false);
}
case SINGLE_PARTITION_NGRAM_INVIX:
case LENGTH_PARTITIONED_NGRAM_INVIX: {
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/FullTextContainsEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/FullTextContainsEvaluator.java
new file mode 100644
index 0000000..471b209
--- /dev/null
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/FullTextContainsEvaluator.java
@@ -0,0 +1,399 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.asterix.runtime.evaluators.common;
+
+import java.io.DataOutput;
+import java.util.Arrays;
+
+import org.apache.asterix.formats.nontagged.BinaryComparatorFactoryProvider;
+import org.apache.asterix.formats.nontagged.BinaryTokenizerFactoryProvider;
+import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
+import org.apache.asterix.om.base.ABoolean;
+import org.apache.asterix.om.base.ANull;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.om.types.BuiltinType;
+import org.apache.asterix.om.types.EnumDeserializer;
+import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
+import org.apache.asterix.runtime.evaluators.functions.FullTextContainsDescriptor;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
+import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.accessors.PointableBinaryHashFunctionFactory;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.primitive.TaggedValuePointable;
+import org.apache.hyracks.data.std.primitive.UTF8StringLowercaseTokenPointable;
+import org.apache.hyracks.data.std.primitive.VoidPointable;
+import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.data.std.util.BinaryEntry;
+import org.apache.hyracks.data.std.util.BinaryHashSet;
+import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
+import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizer;
+import org.apache.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class FullTextContainsEvaluator implements IScalarEvaluator {
+
+ // assuming type indicator in serde format
+ protected static final int TYPE_INDICATOR_SIZE = 1;
+
+ protected final ArrayBackedValueStorage resultStorage = new ArrayBackedValueStorage();
+ protected final DataOutput out = resultStorage.getDataOutput();
+ protected final TaggedValuePointable argLeft = (TaggedValuePointable) TaggedValuePointable.FACTORY
+ .createPointable();
+ protected final TaggedValuePointable argRight = (TaggedValuePointable) TaggedValuePointable.FACTORY
+ .createPointable();
+ protected TaggedValuePointable[] argOptions;
+ protected final IScalarEvaluator evalLeft;
+ protected final IScalarEvaluator evalRight;
+ protected IScalarEvaluator[] evalOptions;
+ protected IPointable outLeft = VoidPointable.FACTORY.createPointable();
+ protected IPointable outRight = VoidPointable.FACTORY.createPointable();
+ protected IPointable[] outOptions;
+ protected int optionArgsLength;
+
+ // To conduct a full-text search, we convert all strings to the lower case.
+ // In addition, since each token does not include the length information (2 bytes) in the beginning,
+ // We need to have a different binary comparator that is different from a standard string comparator.
+ // i.e. A token comparator that receives the length of a token as a parameter.
+ private final IBinaryComparator strLowerCaseTokenCmp =
+ BinaryComparatorFactoryProvider.UTF8STRING_LOWERCASE_TOKEN_POINTABLE_INSTANCE.createBinaryComparator();
+ private final IBinaryComparator strLowerCaseCmp =
+ BinaryComparatorFactoryProvider.UTF8STRING_LOWERCASE_POINTABLE_INSTANCE.createBinaryComparator();
+ private IBinaryTokenizer tokenizerForLeftArray = null;
+ private IBinaryTokenizer tokenizerForRightArray = null;
+
+ // Case insensitive hash for full-text search
+ private IBinaryHashFunction hashFunc = null;
+
+ // keyEntry used in the hash-set
+ private BinaryEntry keyEntry = null;
+
+ // Parameter: number of bucket, frame size, hashFunction, Comparator, byte
+ // array that contains the key
+ private BinaryHashSet rightHashSet = null;
+
+ // Checks whether the query array has been changed
+ private byte[] queryArray = null;
+
+ // If the following is 1, then we will do a disjunctive search.
+ // Else if it is equal to the number of tokens, then we will do a conjunctive search.
+ private int occurrenceThreshold = 1;
+
+ static final int HASH_SET_SLOT_SIZE = 101;
+ static final int HASH_SET_FRAME_SIZE = 32768;
+
+ @SuppressWarnings("unchecked")
+ protected ISerializerDeserializer<ABoolean> serde =
+ SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ABOOLEAN);
+ @SuppressWarnings("unchecked")
+ protected ISerializerDeserializer<ANull> nullSerde =
+ SerializerDeserializerProvider.INSTANCE.getSerializerDeserializer(BuiltinType.ANULL);
+
+ public FullTextContainsEvaluator(IScalarEvaluatorFactory[] args, IHyracksTaskContext context)
+ throws HyracksDataException {
+ evalLeft = args[0].createScalarEvaluator(context);
+ evalRight = args[1].createScalarEvaluator(context);
+ optionArgsLength = args.length - 2;
+ this.evalOptions = new IScalarEvaluator[optionArgsLength];
+ this.outOptions = new IPointable[optionArgsLength];
+ this.argOptions = new TaggedValuePointable[optionArgsLength];
+ // Full-text search options
+ for (int i = 0; i < optionArgsLength; i++) {
+ this.evalOptions[i] = args[i + 2].createScalarEvaluator(context);
+ this.outOptions[i] = VoidPointable.FACTORY.createPointable();
+ this.argOptions[i] = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable();
+ }
+ }
+
+ @Override
+ public void evaluate(IFrameTupleReference tuple, IPointable result) throws HyracksDataException {
+ resultStorage.reset();
+
+ evalLeft.evaluate(tuple, argLeft);
+ argLeft.getValue(outLeft);
+ evalRight.evaluate(tuple, argRight);
+ argRight.getValue(outRight);
+
+ for (int i = 0; i < optionArgsLength; i++) {
+ evalOptions[i].evaluate(tuple, argOptions[i]);
+ argOptions[i].getValue(outOptions[i]);
+ }
+
+ ATypeTag typeTag1 = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argLeft.getTag());
+ ATypeTag typeTag2 = EnumDeserializer.ATYPETAGDESERIALIZER.deserialize(argRight.getTag());
+
+ // Checks whether two appropriate types are provided or not. If not, null will be written.
+ if (!checkArgTypes(typeTag1, typeTag2)) {
+ try {
+ nullSerde.serialize(ANull.NULL, out);
+ } catch (HyracksDataException e) {
+ throw new HyracksDataException(e);
+ }
+ result.set(resultStorage);
+ return;
+ }
+
+ try {
+ ABoolean b = fullTextContainsWithArg(typeTag2, argLeft, argRight) ? ABoolean.TRUE : ABoolean.FALSE;
+ serde.serialize(b, out);
+ } catch (HyracksDataException e1) {
+ throw new HyracksDataException(e1);
+ }
+ result.set(resultStorage);
+ }
+
+ /**
+ * Conducts a full-text search. The basic logic is as follows.
+ * 1) Tokenizes the given query predicate(s). Puts them into a hash set.
+ * 2) Tokenizes the given field. For each token, checks whether the hash set contains it.
+ * If so, increase foundCount for a newly found token.
+ * 3) As soon as the foundCount becomes the given threshold, stops the search and returns true.
+ * After traversing all tokens and still the foundCount is less than the given threshold, then returns false.
+ */
+ private boolean fullTextContainsWithArg(ATypeTag typeTag2, IPointable arg1, IPointable arg2)
+ throws HyracksDataException {
+ // Since a fulltext search form is "X contains text Y",
+ // X (document) is the left side and Y (query predicate) is the right side.
+
+ // Initialize variables that are required to conduct full-text search. (e.g., hash-set, tokenizer ...)
+ initializeFullTextContains(typeTag2);
+
+ // Type tag checking is already done in the previous steps.
+ // So we directly conduct the full-text search process.
+ // The right side contains the query predicates
+ byte[] arg2Array = arg2.getByteArray();
+
+ // Checks whether a new query predicate is introduced.
+ // If not, we can re-use the query predicate array we have already created.
+ if (!Arrays.equals(queryArray, arg2Array)) {
+ resetQueryArrayAndRight(arg2Array, typeTag2, arg2);
+ } else {
+ // The query predicate remains the same. However, the count of each token should be reset to zero.
+ // Here, we visit all elements to clear the count.
+ rightHashSet.clearFoundCount();
+ }
+
+ return readLeftAndConductSearch(arg1);
+ }
+
+ private void initializeFullTextContains(ATypeTag predicateTypeTag) {
+ // We use a hash set to store tokens from the right side (query predicate).
+ // Initialize necessary variables.
+ if (rightHashSet == null) {
+ hashFunc = new PointableBinaryHashFunctionFactory(UTF8StringLowercaseTokenPointable.FACTORY)
+ .createBinaryHashFunction();
+ keyEntry = new BinaryEntry();
+ // Parameter: number of bucket, frame size, hashFunction, Comparator, byte
+ // array that contains the key (this array will be set later.)
+ rightHashSet = new BinaryHashSet(HASH_SET_SLOT_SIZE, HASH_SET_FRAME_SIZE, hashFunc, strLowerCaseTokenCmp,
+ null);
+ tokenizerForLeftArray = BinaryTokenizerFactoryProvider.INSTANCE
+ .getWordTokenizerFactory(ATypeTag.STRING, false, true).createTokenizer();
+ }
+
+ // If the right side is an (un)ordered list, we need to apply the (un)ordered list tokenizer.
+ switch (predicateTypeTag) {
+ case ORDEREDLIST:
+ tokenizerForRightArray = BinaryTokenizerFactoryProvider.INSTANCE
+ .getWordTokenizerFactory(ATypeTag.ORDEREDLIST, false, true).createTokenizer();
+ break;
+ case UNORDEREDLIST:
+ tokenizerForRightArray = BinaryTokenizerFactoryProvider.INSTANCE
+ .getWordTokenizerFactory(ATypeTag.UNORDEREDLIST, false, true).createTokenizer();
+ break;
+ case STRING:
+ tokenizerForRightArray = BinaryTokenizerFactoryProvider.INSTANCE
+ .getWordTokenizerFactory(ATypeTag.STRING, false, true).createTokenizer();
+ break;
+ default:
+ break;
+ }
+ }
+
+ void resetQueryArrayAndRight(byte[] arg2Array, ATypeTag typeTag2, IPointable arg2) throws HyracksDataException {
+ queryArray = new byte[arg2Array.length];
+ System.arraycopy(arg2Array, 0, queryArray, 0, arg2Array.length);
+
+ // Clear hash set for the search predicates.
+ rightHashSet.clear();
+ rightHashSet.setRefArray(queryArray);
+
+ // Token count in this query
+ int queryTokenCount = 0;
+ int uniqueQueryTokenCount = 0;
+
+ int startOffset = arg2.getStartOffset();
+ int length = arg2.getLength();
+
+ // Reset the tokenizer for the given keywords in the given query
+ tokenizerForRightArray.reset(queryArray, startOffset, length);
+
+ // Create tokens from the given query predicate
+ while (tokenizerForRightArray.hasNext()) {
+ tokenizerForRightArray.next();
+ queryTokenCount++;
+
+ // Insert the starting position and the length of the current token into the hash set.
+ // We don't store the actual value of this token since we can access it via offset and length.
+ int tokenOffset = tokenizerForRightArray.getToken().getStartOffset();
+ int tokenLength = tokenizerForRightArray.getToken().getTokenLength();
+ int numBytesToStoreLength;
+
+ // If a token comes from a string tokenizer, each token doesn't have the length data
+ // in the beginning. Instead, if a token comes from an (un)ordered list, each token has
+ // the length data in the beginning. Since KeyEntry keeps the length data
+ // as a parameter, we need to adjust token offset and length in this case.
+ // e.g., 8database <--- we only need to store the offset of 'd' and length 8.
+ if (typeTag2 == ATypeTag.ORDEREDLIST || typeTag2 == ATypeTag.UNORDEREDLIST) {
+ // How many bytes are required to store the length of the given token?
+ numBytesToStoreLength = UTF8StringUtil.getNumBytesToStoreLength(
+ UTF8StringUtil.getUTFLength(tokenizerForRightArray.getToken().getData(),
+ tokenizerForRightArray.getToken().getStartOffset()));
+ tokenOffset = tokenOffset + numBytesToStoreLength;
+ tokenLength = tokenLength - numBytesToStoreLength;
+ }
+ keyEntry.set(tokenOffset, tokenLength);
+
+ // Check whether the given token is a phrase.
+ // Currently, for the full-text search, we don't support a phrase search yet.
+ // So, each query predicate should have only one token.
+ // The same logic should be applied in AbstractTOccurrenceSearcher() class.
+ checkWhetherFullTextPredicateIsPhrase(typeTag2, queryArray, tokenOffset, tokenLength, queryTokenCount);
+
+ // Count the number of tokens in the given query. We only count the unique tokens.
+ // We only care about the first insertion of the token into the hash set
+ // since we apply the set semantics.
+ // e.g., if a query predicate is ["database","system","database"],
+ // then "database" should be counted only once.
+ // Thus, when we find the current token (we don't increase the count in this case),
+ // it should not exist.
+ if (rightHashSet.find(keyEntry, queryArray, false) == -1) {
+ rightHashSet.put(keyEntry);
+ uniqueQueryTokenCount++;
+ }
+
+ }
+
+ // Apply the full-text search option here
+ // Based on the search mode option - "any" or "all", set the occurrence threshold of tokens.
+ setFullTextOption(argOptions, uniqueQueryTokenCount);
+ }
+
+ private void checkWhetherFullTextPredicateIsPhrase(ATypeTag typeTag, byte[] refArray, int tokenOffset,
+ int tokenLength, int queryTokenCount) throws HyracksDataException {
+ switch (typeTag) {
+ case STRING:
+ if (queryTokenCount > 1) {
+ throw new HyracksDataException(
+ "Phrase in Full-text search is not supported. An expression should include only one word.");
+ }
+ break;
+ case ORDEREDLIST:
+ case UNORDEREDLIST:
+ for (int j = 0; j < tokenLength; j++) {
+ if (DelimitedUTF8StringBinaryTokenizer.isSeparator((char) refArray[tokenOffset + j])) {
+ throw new HyracksDataException(
+ "Phrase in Full-text is not supported. An expression should include only one word."
+ + (char) refArray[tokenOffset + j] + " " + refArray[tokenOffset + j]);
+ }
+ }
+ break;
+ default:
+ throw new HyracksDataException("Full-text search can be only executed on STRING or (UN)ORDERED LIST.");
+ }
+ }
+
+ /**
+ * Set full-text options. The odd element is an option name and the even element is the argument for that option.
+ */
+ private void setFullTextOption(IPointable[] argOptions, int uniqueQueryTokenCount) throws HyracksDataException {
+ for (int i = 0; i < optionArgsLength; i = i + 2) {
+ // mode option
+ if (compareStrInByteArrayAndPointable(FullTextContainsDescriptor.getSearchModeOptionArray(), argOptions[i],
+ true) == 0) {
+ if (compareStrInByteArrayAndPointable(FullTextContainsDescriptor.getDisjunctiveFTSearchOptionArray(),
+ argOptions[i + 1], true) == 0) {
+ // ANY
+ occurrenceThreshold = 1;
+ } else if (compareStrInByteArrayAndPointable(
+ FullTextContainsDescriptor.getConjunctiveFTSearchOptionArray(), argOptions[i + 1], true) == 0) {
+ // ALL
+ occurrenceThreshold = uniqueQueryTokenCount;
+ }
+ }
+ }
+ }
+
+ boolean readLeftAndConductSearch(IPointable arg1) throws HyracksDataException {
+ // Now, we traverse the left side (document field) and tokenize the array and check whether each token
+ // exists in the hash set. If it's the first time we find it, we increase foundCount.
+ // As soon as foundCount is greater than occurrenceThreshold, we return true and stop.
+ int foundCount = 0;
+
+ // The left side: field (document)
+ // Reset the tokenizer for the given keywords in a document.
+ tokenizerForLeftArray.reset(arg1.getByteArray(), arg1.getStartOffset(), arg1.getLength());
+
+ // Create tokens from a field in the left side (document)
+ while (tokenizerForLeftArray.hasNext()) {
+ tokenizerForLeftArray.next();
+
+ // Record the starting position and the length of the current token.
+ keyEntry.set(tokenizerForLeftArray.getToken().getStartOffset(),
+ tokenizerForLeftArray.getToken().getTokenLength());
+
+ // Checks whether this token exists in the query hash-set.
+ // We don't count multiple occurrence of a token now.
+ // So, finding the same query predicate twice will not be counted as a found.
+ if (rightHashSet.find(keyEntry, arg1.getByteArray(), true) == 1) {
+ foundCount++;
+ if (foundCount >= occurrenceThreshold) {
+ return true;
+ }
+ }
+ }
+
+ // Traversed all tokens. However, the count is not greater than the threshold.
+ return false;
+ }
+
+ private int compareStrInByteArrayAndPointable(byte[] left, IPointable right, boolean rightTypeTagIncluded)
+ throws HyracksDataException {
+ int rightTypeTagLength = rightTypeTagIncluded ? 1 : 0;
+
+ return strLowerCaseCmp.compare(left, 0, left.length, right.getByteArray(),
+ right.getStartOffset() + rightTypeTagLength, right.getLength() - rightTypeTagLength);
+ }
+
+ /**
+ * Check the argument types. The argument1 should be a string. The argument2 should be a string or (un)ordered list.
+ */
+ protected boolean checkArgTypes(ATypeTag typeTag1, ATypeTag typeTag2) throws HyracksDataException {
+ if ((typeTag1 != ATypeTag.STRING) || (typeTag2 != ATypeTag.ORDEREDLIST && typeTag2 != ATypeTag.UNORDEREDLIST
+ && !ATypeHierarchy.isCompatible(typeTag1, typeTag2))) {
+ return false;
+ }
+ return true;
+ }
+
+}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
index 7c1ef63..4f7a30f 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardCheckEvaluator.java
@@ -27,7 +27,6 @@
import org.apache.asterix.om.types.AOrderedListType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap.BinaryEntry;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -37,6 +36,7 @@
import org.apache.hyracks.data.std.primitive.IntegerPointable;
import org.apache.hyracks.data.std.primitive.VoidPointable;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.data.std.util.BinaryEntry;
import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
public class SimilarityJaccardCheckEvaluator extends SimilarityJaccardEvaluator {
@@ -120,18 +120,18 @@
BinaryEntry entry = hashMap.get(keyEntry);
if (entry != null) {
// Increment second value.
- int firstValInt = IntegerPointable.getInteger(entry.buf, entry.off);
+ int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
// Irrelevant for the intersection size.
if (firstValInt == 0) {
continue;
}
- int secondValInt = IntegerPointable.getInteger(entry.buf, entry.off + 4);
+ int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
// Subtract old min value.
intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
secondValInt++;
// Add new min value.
intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
- IntegerPointable.setInteger(entry.buf, entry.off + 4, secondValInt);
+ IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
} else {
// Could not find element in other set. Increase min union size by 1.
minUnionSize++;
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
index f08073c..2bad468 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/common/SimilarityJaccardEvaluator.java
@@ -27,13 +27,12 @@
import org.apache.asterix.formats.nontagged.SerializerDeserializerProvider;
import org.apache.asterix.om.base.AFloat;
import org.apache.asterix.om.base.AMutableFloat;
-import org.apache.asterix.runtime.exceptions.TypeMismatchException;
import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.EnumDeserializer;
import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
-import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap.BinaryEntry;
+import org.apache.asterix.runtime.exceptions.TypeMismatchException;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
import org.apache.hyracks.api.context.IHyracksTaskContext;
@@ -45,6 +44,7 @@
import org.apache.hyracks.data.std.primitive.IntegerPointable;
import org.apache.hyracks.data.std.primitive.VoidPointable;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.data.std.util.BinaryEntry;
import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
public class SimilarityJaccardEvaluator implements IScalarEvaluator {
@@ -171,7 +171,7 @@
protected void buildHashMap(AbstractAsterixListIterator buildIter) throws HyracksDataException {
// Build phase: Add items into hash map, starting with first list.
// Value in map is a pair of integers. Set first integer to 1.
- IntegerPointable.setInteger(valEntry.buf, 0, 1);
+ IntegerPointable.setInteger(valEntry.getBuf(), 0, 1);
while (buildIter.hasNext()) {
byte[] buf = buildIter.getData();
int off = buildIter.getPos();
@@ -180,8 +180,8 @@
BinaryEntry entry = hashMap.put(keyEntry, valEntry);
if (entry != null) {
// Increment value.
- int firstValInt = IntegerPointable.getInteger(entry.buf, entry.off);
- IntegerPointable.setInteger(entry.buf, entry.off, firstValInt + 1);
+ int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
+ IntegerPointable.setInteger(entry.getBuf(), entry.getOffset(), firstValInt + 1);
}
buildIter.next();
}
@@ -199,18 +199,18 @@
BinaryEntry entry = hashMap.get(keyEntry);
if (entry != null) {
// Increment second value.
- int firstValInt = IntegerPointable.getInteger(entry.buf, entry.off);
+ int firstValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
// Irrelevant for the intersection size.
if (firstValInt == 0) {
continue;
}
- int secondValInt = IntegerPointable.getInteger(entry.buf, entry.off + 4);
+ int secondValInt = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset() + 4);
// Subtract old min value.
intersectionSize -= (firstValInt < secondValInt) ? firstValInt : secondValInt;
secondValInt++;
// Add new min value.
intersectionSize += (firstValInt < secondValInt) ? firstValInt : secondValInt;
- IntegerPointable.setInteger(entry.buf, entry.off + 4, secondValInt);
+ IntegerPointable.setInteger(entry.getBuf(), entry.getOffset() + 4, secondValInt);
}
probeIter.next();
}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/BinaryHashMap.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/BinaryHashMap.java
index d89a63e..2864473 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/BinaryHashMap.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/BinaryHashMap.java
@@ -18,9 +18,6 @@
*/
package org.apache.asterix.runtime.evaluators.functions;
-import java.io.ByteArrayInputStream;
-import java.io.DataInput;
-import java.io.DataInputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
@@ -30,8 +27,8 @@
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
-import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.data.std.util.BinaryEntry;
/**
* The most simple implementation of a static hashtable you could imagine.
@@ -60,26 +57,6 @@
private int nextOff;
private int size;
- // Can be used for key or value.
- public static class BinaryEntry {
- public byte[] buf;
- public int off;
- public int len;
-
- public void set(byte[] buf, int off, int len) {
- this.buf = buf;
- this.off = off;
- this.len = len;
- }
-
- // Inefficient. Just for debugging.
- @SuppressWarnings("rawtypes")
- public String print(ISerializerDeserializer serde) throws HyracksDataException {
- ByteArrayInputStream inStream = new ByteArrayInputStream(buf, off, len);
- DataInput dataIn = new DataInputStream(inStream);
- return serde.deserialize(dataIn).toString();
- }
- }
public BinaryHashMap(int tableSize, int frameSize, IBinaryHashFunction putHashFunc,
IBinaryHashFunction getHashFunc, IBinaryComparator cmp) {
@@ -119,9 +96,9 @@
private BinaryEntry getPutInternal(BinaryEntry key, BinaryEntry value, boolean put) throws HyracksDataException {
int bucket;
if (put) {
- bucket = Math.abs(putHashFunc.hash(key.buf, key.off, key.len) % listHeads.length);
+ bucket = Math.abs(putHashFunc.hash(key.getBuf(), key.getOffset(), key.getLength()) % listHeads.length);
} else {
- bucket = Math.abs(getHashFunc.hash(key.buf, key.off, key.len) % listHeads.length);
+ bucket = Math.abs(getHashFunc.hash(key.getBuf(), key.getOffset(), key.getLength()) % listHeads.length);
}
long headPtr = listHeads[bucket];
if (headPtr == NULL_PTR) {
@@ -140,7 +117,8 @@
frame = frames.get(frameIndex);
int entryKeyOff = frameOff + ENTRY_HEADER_SIZE;
int entryKeyLen = frame.getShort(frameOff);
- if (cmp.compare(frame.array(), entryKeyOff, entryKeyLen, key.buf, key.off, key.len) == 0) {
+ if (cmp.compare(frame.array(), entryKeyOff, entryKeyLen, key.getBuf(), key.getOffset(),
+ key.getLength()) == 0) {
// Key found, set values and return.
int entryValOff = frameOff + ENTRY_HEADER_SIZE + entryKeyLen;
int entryValLen = frame.getShort(frameOff + SLOT_SIZE);
@@ -160,7 +138,7 @@
public long appendEntry(BinaryEntry key, BinaryEntry value) {
ByteBuffer frame = frames.get(currFrameIndex);
- int requiredSpace = key.len + value.len + ENTRY_HEADER_SIZE;
+ int requiredSpace = key.getLength() + value.getLength() + ENTRY_HEADER_SIZE;
if (nextOff + requiredSpace >= frameSize) {
// Entry doesn't fit on frame, allocate a new one.
if (requiredSpace > frameSize) {
@@ -171,9 +149,10 @@
nextOff = 0;
frame = frames.get(currFrameIndex);
}
- writeEntryHeader(frame, nextOff, key.len, value.len, NULL_PTR);
- System.arraycopy(key.buf, key.off, frame.array(), nextOff + ENTRY_HEADER_SIZE, key.len);
- System.arraycopy(value.buf, value.off, frame.array(), nextOff + ENTRY_HEADER_SIZE + key.len, value.len);
+ writeEntryHeader(frame, nextOff, key.getLength(), value.getLength(), NULL_PTR);
+ System.arraycopy(key.getBuf(), key.getOffset(), frame.array(), nextOff + ENTRY_HEADER_SIZE, key.getLength());
+ System.arraycopy(value.getBuf(), value.getOffset(), frame.array(),
+ nextOff + ENTRY_HEADER_SIZE + key.getLength(), value.getLength());
long entryPtr = getEntryPtr(currFrameIndex, nextOff);
nextOff += requiredSpace;
size++;
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/FullTextContainsDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/FullTextContainsDescriptor.java
new file mode 100644
index 0000000..082e0cf
--- /dev/null
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/FullTextContainsDescriptor.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.asterix.runtime.evaluators.functions;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
+import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.IFunctionDescriptorFactory;
+import org.apache.asterix.om.types.ATypeTag;
+import org.apache.asterix.runtime.evaluators.base.AbstractScalarFunctionDynamicDescriptor;
+import org.apache.asterix.runtime.evaluators.common.FullTextContainsEvaluator;
+import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
+import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluator;
+import org.apache.hyracks.algebricks.runtime.base.IScalarEvaluatorFactory;
+import org.apache.hyracks.api.context.IHyracksTaskContext;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+public class FullTextContainsDescriptor extends AbstractScalarFunctionDynamicDescriptor {
+ private static final long serialVersionUID = 1L;
+
+ // parameter name and its type - based on the order of parameters in this map, parameters will be re-arranged.
+ private static final Map<String, ATypeTag> paramTypeMap = new LinkedHashMap<>();
+
+ public static final String SEARCH_MODE_OPTION = "mode";
+ public static final String DISJUNCTIVE_SEARCH_MODE_OPTION = "any";
+ public static final String CONJUNCTIVE_SEARCH_MODE_OPTION = "all";
+
+ private static final byte[] SEARCH_MODE_OPTION_ARRAY = UTF8StringUtil.writeStringToBytes(SEARCH_MODE_OPTION);
+ private static final byte[] DISJUNCTIVE_SEARCH_MODE_OPTION_ARRAY = UTF8StringUtil
+ .writeStringToBytes(DISJUNCTIVE_SEARCH_MODE_OPTION);
+ private static final byte[] CONJUNCTIVE_SEARCH_MODE_OPTION_ARRAY = UTF8StringUtil
+ .writeStringToBytes(CONJUNCTIVE_SEARCH_MODE_OPTION);
+
+ static {
+ paramTypeMap.put(SEARCH_MODE_OPTION, ATypeTag.STRING);
+ }
+
+ public static final IFunctionDescriptorFactory FACTORY = new IFunctionDescriptorFactory() {
+ @Override
+ public IFunctionDescriptor createFunctionDescriptor() {
+ return new FullTextContainsDescriptor();
+ }
+ };
+
+ /**
+ * Creates full-text search evaluator. There are three arguments:
+ * arg0: Expression1 - search field
+ * arg1: Expression2 - search predicate
+ * arg2 and so on: Full-text search option
+ */
+ @Override
+ public IScalarEvaluatorFactory createEvaluatorFactory(final IScalarEvaluatorFactory[] args)
+ throws AlgebricksException {
+ return new IScalarEvaluatorFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IScalarEvaluator createScalarEvaluator(IHyracksTaskContext ctx) throws HyracksDataException {
+ return new FullTextContainsEvaluator(args, ctx);
+ }
+ };
+ }
+
+ @Override
+ public FunctionIdentifier getIdentifier() {
+ return AsterixBuiltinFunctions.FULLTEXT_CONTAINS;
+ }
+
+ public static byte[] getSearchModeOptionArray() {
+ return SEARCH_MODE_OPTION_ARRAY;
+ }
+
+ public static byte[] getDisjunctiveFTSearchOptionArray() {
+ return DISJUNCTIVE_SEARCH_MODE_OPTION_ARRAY;
+ }
+
+ public static byte[] getConjunctiveFTSearchOptionArray() {
+ return CONJUNCTIVE_SEARCH_MODE_OPTION_ARRAY;
+ }
+
+ public static Map<String, ATypeTag> getParamTypeMap() {
+ return paramTypeMap;
+ }
+}
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/records/RecordAddFieldsDescriptor.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/records/RecordAddFieldsDescriptor.java
index e8a2c42..b8908dd 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/records/RecordAddFieldsDescriptor.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/functions/records/RecordAddFieldsDescriptor.java
@@ -57,6 +57,7 @@
import org.apache.hyracks.data.std.api.IPointable;
import org.apache.hyracks.data.std.primitive.VoidPointable;
import org.apache.hyracks.data.std.util.ArrayBackedValueStorage;
+import org.apache.hyracks.data.std.util.BinaryEntry;
import org.apache.hyracks.dataflow.common.data.accessors.IFrameTupleReference;
public class RecordAddFieldsDescriptor extends AbstractScalarFunctionDynamicDescriptor {
@@ -120,8 +121,8 @@
.createBinaryHashFunction();
private final IBinaryHashFunction getHashFunc = ListItemBinaryHashFunctionFactory.INSTANCE
.createBinaryHashFunction();
- private final BinaryHashMap.BinaryEntry keyEntry = new BinaryHashMap.BinaryEntry();
- private final BinaryHashMap.BinaryEntry valEntry = new BinaryHashMap.BinaryEntry();
+ private final BinaryEntry keyEntry = new BinaryEntry();
+ private final BinaryEntry valEntry = new BinaryEntry();
private final IVisitablePointable tempValReference = allocator.allocateEmpty();
private final IBinaryComparator cmp = ListItemBinaryComparatorFactory.INSTANCE
.createBinaryComparator();
@@ -234,9 +235,9 @@
keyEntry.set(namePointable.getByteArray(), namePointable.getStartOffset(),
namePointable.getLength());
// Check if already in our built record
- BinaryHashMap.BinaryEntry entry = hashMap.get(keyEntry);
+ BinaryEntry entry = hashMap.get(keyEntry);
if (entry != null) {
- tempValReference.set(entry.buf, entry.off, entry.len);
+ tempValReference.set(entry.getBuf(), entry.getOffset(), entry.getLength());
// If value is not equal throw conflicting duplicate field, otherwise ignore
if (!PointableHelper.byteArrayEqual(valuePointable, tempValReference)) {
throw new RuntimeDataException(ErrorCode.ERROR_DUPLICATE_FIELD_NAME,
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/DeepEqualityVisitorHelper.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/DeepEqualityVisitorHelper.java
index 0e1f342..000425e 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/DeepEqualityVisitorHelper.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/DeepEqualityVisitorHelper.java
@@ -25,6 +25,7 @@
import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import org.apache.hyracks.data.std.util.BinaryEntry;
public class DeepEqualityVisitorHelper {
// Default values
@@ -39,11 +40,11 @@
private IBinaryComparator cmp = listItemBinaryComparatorFactory.createBinaryComparator();
private BinaryHashMap hashMap = null;
- public BinaryHashMap initializeHashMap(BinaryHashMap.BinaryEntry valEntry) {
+ public BinaryHashMap initializeHashMap(BinaryEntry valEntry) {
return initializeHashMap(0, 0, valEntry);
}
- public BinaryHashMap initializeHashMap(int tableSize, int tableFrameSize, BinaryHashMap.BinaryEntry valEntry) {
+ public BinaryHashMap initializeHashMap(int tableSize, int tableFrameSize, BinaryEntry valEntry) {
if (tableFrameSize != 0 && tableSize != 0) {
hashMap = new BinaryHashMap(tableSize, tableFrameSize, putHashFunc, getHashFunc, cmp);
} else {
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/ListDeepEqualityChecker.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/ListDeepEqualityChecker.java
index 6d5513d..df4847e 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/ListDeepEqualityChecker.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/ListDeepEqualityChecker.java
@@ -20,16 +20,17 @@
import java.io.IOException;
import java.util.List;
+
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.om.pointables.AListVisitablePointable;
import org.apache.asterix.om.pointables.base.IVisitablePointable;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap;
-import org.apache.asterix.runtime.evaluators.functions.BinaryHashMap.BinaryEntry;
import org.apache.asterix.runtime.evaluators.functions.PointableHelper;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.data.std.util.BinaryEntry;
class ListDeepEqualityChecker {
private DeepEqualityVisitor visitor;
@@ -100,7 +101,7 @@
int off = item.getStartOffset();
int len = item.getLength();
keyEntry.set(buf, off, len);
- IntegerPointable.setInteger(valEntry.buf, 0, i);
+ IntegerPointable.setInteger(valEntry.getBuf(), 0, i);
hashMap.put(keyEntry, valEntry);
}
@@ -125,7 +126,7 @@
return false;
}
- int indexLeft = IntegerPointable.getInteger(entry.buf, entry.off);
+ int indexLeft = IntegerPointable.getInteger(entry.getBuf(), entry.getOffset());
ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(itemTagTypesLeft.get(indexLeft));
if(fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(itemTagTypesRight.get(indexRight))) {
return false;
diff --git a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/RecordDeepEqualityChecker.java b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/RecordDeepEqualityChecker.java
index 84e9cf6..40af09a 100644
--- a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/RecordDeepEqualityChecker.java
+++ b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/evaluators/visitors/RecordDeepEqualityChecker.java
@@ -30,14 +30,15 @@
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.data.std.primitive.IntegerPointable;
+import org.apache.hyracks.data.std.util.BinaryEntry;
class RecordDeepEqualityChecker {
private final Pair<IVisitablePointable, Boolean> nestedVisitorArg = new Pair<IVisitablePointable, Boolean>(null,
false);
private final DeepEqualityVisitorHelper deepEqualityVisitorHelper = new DeepEqualityVisitorHelper();
private DeepEqualityVisitor visitor;
- private BinaryHashMap.BinaryEntry keyEntry = new BinaryHashMap.BinaryEntry();
- private BinaryHashMap.BinaryEntry valEntry = new BinaryHashMap.BinaryEntry();
+ private BinaryEntry keyEntry = new BinaryEntry();
+ private BinaryEntry valEntry = new BinaryEntry();
private BinaryHashMap hashMap;
public RecordDeepEqualityChecker(int tableSize, int tableFrameSize) {
@@ -75,7 +76,7 @@
for (int i = 0; i < sizeLeft; i++) {
IVisitablePointable fieldName = fieldNamesLeft.get(i);
keyEntry.set(fieldName.getByteArray(), fieldName.getStartOffset(), fieldName.getLength());
- IntegerPointable.setInteger(valEntry.buf, 0, i);
+ IntegerPointable.setInteger(valEntry.getBuf(), 0, i);
hashMap.put(keyEntry, valEntry);
}
@@ -91,12 +92,12 @@
for (int i = 0; i < fieldNamesRight.size(); i++) {
IVisitablePointable fieldName = fieldNamesRight.get(i);
keyEntry.set(fieldName.getByteArray(), fieldName.getStartOffset(), fieldName.getLength());
- BinaryHashMap.BinaryEntry entry = hashMap.get(keyEntry);
+ BinaryEntry entry = hashMap.get(keyEntry);
if (entry == null) {
return false;
}
- int fieldIdLeft = AInt32SerializerDeserializer.getInt(entry.buf, entry.off);
+ int fieldIdLeft = AInt32SerializerDeserializer.getInt(entry.getBuf(), entry.getOffset());
ATypeTag fieldTypeLeft = PointableHelper.getTypeTag(fieldTypesLeft.get(fieldIdLeft));
if (fieldTypeLeft.isDerivedType() && fieldTypeLeft != PointableHelper.getTypeTag(fieldTypesRight.get(i))) {
return false;
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercaseTokenPointable.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercaseTokenPointable.java
new file mode 100644
index 0000000..66c1ab9
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/primitive/UTF8StringLowercaseTokenPointable.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009-2013 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hyracks.data.std.primitive;
+
+import org.apache.hyracks.api.dataflow.value.ITypeTraits;
+import org.apache.hyracks.data.std.api.AbstractPointable;
+import org.apache.hyracks.data.std.api.IComparable;
+import org.apache.hyracks.data.std.api.IHashable;
+import org.apache.hyracks.data.std.api.IPointable;
+import org.apache.hyracks.data.std.api.IPointableFactory;
+import org.apache.hyracks.util.string.UTF8StringUtil;
+
+/**
+ * This lowercase string token pointable is for the UTF8 string that doesn't have length bytes in the beginning.
+ * This pointable exists to represent a string token.
+ * The reason is that when we tokenize a string, each token will contain the length as a separate value.
+ * Instead, the length of this string is provided as a parameter.
+ */
+public final class UTF8StringLowercaseTokenPointable extends AbstractPointable implements IHashable, IComparable {
+ public static final ITypeTraits TYPE_TRAITS = new ITypeTraits() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public boolean isFixedLength() {
+ return false;
+ }
+
+ @Override
+ public int getFixedLength() {
+ return 0;
+ }
+ };
+
+ public static final IPointableFactory FACTORY = new IPointableFactory() {
+ private static final long serialVersionUID = 1L;
+
+ @Override
+ public IPointable createPointable() {
+ return new UTF8StringLowercaseTokenPointable();
+ }
+
+ @Override
+ public ITypeTraits getTypeTraits() {
+ return TYPE_TRAITS;
+ }
+ };
+
+ // Set the length of this pointable
+ public void setLength(int length) {
+ this.length = length;
+ }
+
+ @Override
+ public int compareTo(IPointable pointer) {
+ return compareTo(pointer.getByteArray(), pointer.getStartOffset(), pointer.getLength());
+ }
+
+ @Override
+ public int compareTo(byte[] bytes, int start, int length) {
+ return UTF8StringUtil.lowerCaseCompareTo(this.bytes, this.start, this.length, bytes, start, length);
+ }
+
+ @Override
+ public int hash() {
+ return UTF8StringUtil.lowerCaseHash(bytes, start, length);
+ }
+
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryEntry.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryEntry.java
new file mode 100644
index 0000000..7336dca
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryEntry.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+
+import org.apache.hyracks.api.dataflow.value.ISerializerDeserializer;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * A class that stores a meta-data (buf, offset, length) of the entry for BinaryHashMap and BinaryHashSet.
+ */
+public class BinaryEntry {
+ private int off;
+ private int len;
+ private byte[] buf;
+
+ public void set(int offset, int length) {
+ this.buf = null;
+ this.off = offset;
+ this.len = length;
+ }
+
+ public void set(byte[] buf, int off, int len) {
+ this.buf = buf;
+ this.off = off;
+ this.len = len;
+ }
+
+ public void setOffset(int off) {
+ this.off = off;
+ }
+
+ public int getOffset() {
+ return off;
+ }
+
+ public void setLength(int len) {
+ this.len = len;
+ }
+
+ public int getLength() {
+ return len;
+ }
+
+ public void setBuf(byte[] buf) {
+ this.buf = buf;
+ }
+
+ public byte[] getBuf() {
+ return buf;
+ }
+
+ // Inefficient. Just for debugging.
+ @SuppressWarnings("rawtypes")
+ public String print(ISerializerDeserializer serde) throws HyracksDataException {
+ ByteArrayInputStream inStream = new ByteArrayInputStream(buf, off, len);
+ DataInput dataIn = new DataInputStream(inStream);
+ return serde.deserialize(dataIn).toString();
+ }
+}
\ No newline at end of file
diff --git a/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryHashSet.java b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryHashSet.java
new file mode 100644
index 0000000..c3e36da
--- /dev/null
+++ b/hyracks-fullstack/hyracks/hyracks-data/hyracks-data-std/src/main/java/org/apache/hyracks/data/std/util/BinaryHashSet.java
@@ -0,0 +1,299 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.hyracks.data.std.util;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
+import org.apache.hyracks.api.dataflow.value.IBinaryHashFunction;
+import org.apache.hyracks.api.exceptions.HyracksDataException;
+
+/**
+ * The most simplest implementation of a static hash-set you could imagine.
+ * Intended to work with binary data and be able to map arbitrary key types to
+ * arbitrary value types, given that they have implementations of
+ * IBinaryHashFunction and IBinaryComparator.
+ * Each key in the hash table: the offset (2 byte), length of an entry (2 byte).
+ * The real key value is not stored in the set since it can be found using the reference array.
+ * Additionally, it has the count (1 byte) in a byte array.
+ * Hash value: based on an entry value, it will be calculated.
+ * This class is NOT thread safe. - For single thread access only
+ * Limitation - a frame size can't be greater than 64K because we use 2 bytes to store the offset.
+ * Can't have more than 64K frames.
+ */
+public class BinaryHashSet {
+ // Special value to indicate an empty "bucket" in the header array.
+ static final int NULL_PTR = -1;
+ private static final int PTR_SIZE = 4; // 2 byte - frameIdx, 2 byte - frameOffset
+ static final int SLOT_SIZE = 2;
+
+ // This hash-set also stores the count of the real key.
+ // It's not part of the key and can be used to indicate whether this key exists in a different array or not.
+ static final int COUNT_SIZE = 1; // max value: Byte.MAX_VALUE (2^7 - 1)
+ private static final int ENTRY_HEADER_SIZE = 2 * SLOT_SIZE + PTR_SIZE + COUNT_SIZE;
+ // We are using 2 byte. Therefore, the limit is 64K.
+ private static final int NO_OF_FRAME_LIMIT = 65535;
+ private static final int ONE_FRAME_SIZE_LIMIT = 65535;
+ private final IBinaryHashFunction hashFunc;
+ private final IBinaryComparator cmp;
+
+ private final int[] listHeads;
+ private final int frameSize;
+ private final List<ByteBuffer> frames = new ArrayList<>();
+ private int currFrameIndex;
+ private int nextOff;
+ private int size;
+
+ // Byte array that holds the real data for this hashset
+ private byte[] refArray;
+
+ // Initialize a hash-set. It will contain one frame by default.
+ public BinaryHashSet(int tableSize, int frameSize, IBinaryHashFunction hashFunc, IBinaryComparator cmp,
+ byte[] refArray) {
+ listHeads = new int[tableSize];
+ if (frameSize > ONE_FRAME_SIZE_LIMIT) {
+ throw new IllegalStateException(
+ "A frame size can't be greater than " + ONE_FRAME_SIZE_LIMIT + ". Can't continue.");
+ }
+ this.frameSize = frameSize;
+ this.hashFunc = hashFunc;
+ this.cmp = cmp;
+ frames.add(ByteBuffer.allocate(frameSize));
+ clear();
+ this.refArray = refArray;
+ }
+
+ /**
+ * Set the byte array that the keys in this hash-set refer to.
+ *
+ * @param refArray
+ */
+ public void setRefArray(byte[] refArray) {
+ this.refArray = refArray;
+ }
+
+ /**
+ * Inserts a key (off, len) into the hash set.
+ * The count of the key will not be changed.
+ *
+ * @param key
+ * @return the current count of the key: when a given key is inserted or that key is already there.
+ * 0: when an insertion succeeds.
+ * @throws HyracksDataException
+ */
+ public int put(BinaryEntry key) throws HyracksDataException {
+ return putFindInternal(key, true, null, false);
+ }
+
+ /**
+ * Find whether the given key from an array exists in the hash set.
+ * If the key exists, then the count will be increased by 1.
+ *
+ * @param key
+ * @param keyArray
+ * @param increaseFoundCount
+ * @return the current count of the key: when a given key exists.
+ * -1: when the given key doesn't exist.
+ * @throws HyracksDataException
+ */
+ public int find(BinaryEntry key, byte[] keyArray, boolean increaseFoundCount) throws HyracksDataException {
+ return putFindInternal(key, false, keyArray, increaseFoundCount);
+ }
+
+
+ // Put an entry or find an entry
+ private int putFindInternal(BinaryEntry key, boolean isInsert, byte[] keyArray, boolean increaseFoundCount)
+ throws HyracksDataException {
+ int bucket;
+ bucket = isInsert ? Math.abs(hashFunc.hash(this.refArray, key.getOffset(), key.getLength()) % listHeads.length)
+ : Math.abs(hashFunc.hash(keyArray, key.getOffset(), key.getLength()) % listHeads.length);
+
+ int headPtr = listHeads[bucket];
+ if (headPtr == NULL_PTR) {
+ // Key definitely doesn't exist yet.
+ if (isInsert) {
+ // Key is being inserted.
+ listHeads[bucket] = appendEntry(key);
+ return 0;
+ } else {
+ // find case - the bucket is empty: return false since there is no element in the hash-set
+ return -1;
+ }
+
+ }
+ // if headPtr is not null,
+ // follow the chain in the bucket until we found an entry matching the given key.
+ int frameNum;
+ int frameOff;
+ int entryKeyOff;
+ int entryKeyLen;
+ int entryCount;
+ ByteBuffer frame;
+ do {
+ // Get frame num and frame offset from the ptr
+ frameNum = getFrameIndex(headPtr);
+ frameOff = getFrameOffset(headPtr);
+ frame = frames.get(frameNum);
+
+ // Get entry offset
+ entryKeyOff = (int) frame.getChar(frameOff);
+ entryKeyLen = (int) frame.getChar(frameOff + SLOT_SIZE);
+
+ // Check the key length. If they don't match, we don't even need to compare two entries.
+ if (entryKeyLen == key.getLength()) {
+ if (isInsert) {
+ if (cmp.compare(this.refArray, entryKeyOff, entryKeyLen, this.refArray, key.getOffset(),
+ key.getLength()) == 0) {
+ // put - Key found, return true since we return true when the key is already in the hash-map.
+ entryCount = (int) frame.get(frameOff + 2 * SLOT_SIZE);
+ return entryCount;
+ }
+ } else if (cmp.compare(this.refArray, entryKeyOff, entryKeyLen, keyArray, key.getOffset(),
+ key.getLength()) == 0) {
+ // Find case - the key is found, increase the count when increaseCount is set to true.
+ // Return the count. The maximum count is Byte.MAX_VALUE.
+ entryCount = (int) frame.get(frameOff + 2 * SLOT_SIZE);
+ if (increaseFoundCount && entryCount < Byte.MAX_VALUE) {
+ entryCount++;
+ }
+ frame.put(frameOff + 2 * SLOT_SIZE, (byte) entryCount);
+ return entryCount;
+ }
+ }
+ // Get next key position
+ headPtr = frame.getInt(frameOff + 2 * SLOT_SIZE + COUNT_SIZE);
+ } while (headPtr != NULL_PTR);
+
+ // We've followed the chain to its end, and didn't find the key.
+ if (isInsert) {
+ // Append the new entry, and set a pointer to it in the last entry we've checked.
+ // put case - success
+ int newPtr = appendEntry(key);
+ frame.putInt(frameOff + 2 * SLOT_SIZE + COUNT_SIZE, newPtr);
+ return 0;
+ } else {
+ // find case - fail
+ return -1;
+ }
+ }
+
+ public int appendEntry(BinaryEntry key) {
+ ByteBuffer frame = frames.get(currFrameIndex);
+ int requiredSpace = ENTRY_HEADER_SIZE;
+ if (nextOff + requiredSpace >= frameSize) {
+ // Entry doesn't fit on the current frame, allocate a new one.
+ if (requiredSpace > frameSize) {
+ throw new IllegalStateException(
+ "A hash key is greater than the framesize: " + frameSize + ". Can't continue.");
+ } else if (frames.size() > NO_OF_FRAME_LIMIT) {
+ throw new IllegalStateException(
+ "There can't be more than " + NO_OF_FRAME_LIMIT + "frames. Can't continue.");
+ }
+ frames.add(ByteBuffer.allocate(frameSize));
+ currFrameIndex++;
+ nextOff = 0;
+ frame = frames.get(currFrameIndex);
+ }
+ writeEntryHeader(frame, nextOff, key.getOffset(), key.getLength(), 0, NULL_PTR);
+ int entryPtr = getEntryPtr(currFrameIndex, nextOff);
+ nextOff += requiredSpace;
+ size++;
+ return entryPtr;
+ }
+
+ private void writeEntryHeader(ByteBuffer frame, int targetOff, int keyOff, int keyLen, int keyCount, int ptr) {
+ // [2 byte key offset] [2 byte key length] [1 byte key count] [2 byte the frame num] [2 byte the frame offset]
+ frame.putChar(targetOff, (char) keyOff);
+ frame.putChar(targetOff + SLOT_SIZE, (char) keyLen);
+ frame.put(targetOff + 2 * SLOT_SIZE, (byte) keyCount);
+ frame.putInt(targetOff + 2 * SLOT_SIZE + COUNT_SIZE, ptr);
+ }
+
+ private int getEntryPtr(int frameIndex, int frameOff) {
+ return (frameIndex << 16) + frameOff;
+ }
+
+ private int getFrameIndex(int ptr) {
+ return (int) (ptr >> 16);
+ }
+
+ private int getFrameOffset(int ptr) {
+ return (int) (ptr & 0xffff);
+ }
+
+ public int size() {
+ return size;
+ }
+
+ public boolean isEmpty() {
+ return size > 0;
+ }
+
+ public void clear() {
+ // Initialize all entries to point to nothing.
+ Arrays.fill(listHeads, NULL_PTR);
+ currFrameIndex = 0;
+ nextOff = 0;
+ size = 0;
+ this.refArray = null;
+ }
+
+ /**
+ * Iterate all key entries and reset the foundCount of each key to zero.
+ */
+ public void clearFoundCount() {
+ int currentListHeadIndex = 0;
+ ByteBuffer frame;
+ int frameNum;
+ int frameOff;
+ int headPtr;
+ int checkedListHeadIndex = -1;
+
+ while (true) {
+ // Position to first non-null list-head pointer.
+ while (currentListHeadIndex < listHeads.length && listHeads[currentListHeadIndex] == NULL_PTR) {
+ currentListHeadIndex++;
+ }
+ headPtr = listHeads[currentListHeadIndex];
+ do {
+ // Get frame num and frame offset from the ptr
+ frameNum = getFrameIndex(headPtr);
+ frameOff = getFrameOffset(headPtr);
+ frame = frames.get(frameNum);
+
+ // Set the count as zero
+ frame.put(frameOff + 2 * SLOT_SIZE, (byte) 0);
+
+ // Get next key position
+ headPtr = frame.getInt(frameOff + 2 * SLOT_SIZE + COUNT_SIZE);
+ } while (headPtr != NULL_PTR);
+
+ if (checkedListHeadIndex == currentListHeadIndex) {
+ // no more slots to read - we stop here.
+ break;
+ }
+
+ checkedListHeadIndex = currentListHeadIndex;
+ }
+ }
+
+}
diff --git a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
index 28fa2be..32e930d 100644
--- a/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
+++ b/hyracks-fullstack/hyracks/hyracks-storage-am-lsm-invertedindex/src/main/java/org/apache/hyracks/storage/am/lsm/invertedindex/tokenizers/DelimitedUTF8StringBinaryTokenizer.java
@@ -50,7 +50,7 @@
return byteIndex < sentenceEndOffset;
}
- private static boolean isSeparator(char c) {
+ public static boolean isSeparator(char c) {
return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER
|| Character.getType(c) == Character.OTHER_NUMBER);
}
diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
index e867ecc..cd654d7 100644
--- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/string/UTF8StringUtil.java
@@ -249,6 +249,13 @@
return compareTo(thisBytes, thisStart, thatBytes, thatStart, true, false);
}
+ // Certain type of string does not include lengthByte in the beginning and
+ // the length of the given string is given explicitly as a parameter. (e.g., token in a string)
+ public static int lowerCaseCompareTo(byte[] thisBytes, int thisStart, int thisLength, byte[] thatBytes,
+ int thatStart, int thatLength) {
+ return compareTo(thisBytes, thisStart, thisLength, thatBytes, thatStart, thatLength, true, false);
+ }
+
public static int hash(byte[] bytes, int start, int coefficient, int r) {
return hash(bytes, start, false, false, coefficient, r);
}
@@ -257,6 +264,12 @@
return hash(bytes, start, false, false, 31, Integer.MAX_VALUE);
}
+ private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
+ int utflen = getUTFLength(bytes, start);
+ int sStart = start + getNumBytesToStoreLength(utflen);
+ return hash(bytes, sStart, utflen, useLowerCase, useRawByte, coefficient, r);
+ }
+
/**
* This function provides the raw bytes-based hash function for UTF8 strings.
* Note that the hash values may not deliver the correct ordering for certain languages that include 2 or 3 bytes characters.
@@ -270,6 +283,12 @@
return hash(bytes, start, true, false, 31, Integer.MAX_VALUE);
}
+ // Certain type of string does not include lengthByte in the beginning and
+ // the length of the given string is given explicitly as a parameter.
+ public static int lowerCaseHash(byte[] bytes, int start, int length) {
+ return hash(bytes, start, length, true, false, 31, Integer.MAX_VALUE);
+ }
+
public static StringBuilder toString(StringBuilder builder, byte[] bytes, int start) {
int utfLen = getUTFLength(bytes, start);
int offset = getNumBytesToStoreLength(utfLen);
@@ -352,23 +371,27 @@
private static int compareTo(byte[] thisBytes, int thisStart, byte[] thatBytes, int thatStart, boolean useLowerCase,
boolean useRawByte) {
- int utflen1 = getUTFLength(thisBytes, thisStart);
- int utflen2 = getUTFLength(thatBytes, thatStart);
+ int thisLength = getUTFLength(thisBytes, thisStart);;
+ int thatLength = getUTFLength(thatBytes, thatStart);
+ int thisActualStart = thisStart + getNumBytesToStoreLength(thisLength);
+ int thatActualStart = thatStart + getNumBytesToStoreLength(thatLength);
+ return compareTo(thisBytes, thisActualStart, thisLength, thatBytes, thatActualStart, thatLength, useLowerCase,
+ useRawByte);
+ }
+ private static int compareTo(byte[] thisBytes, int thisActualStart, int thisLength, byte[] thatBytes,
+ int thatActualStart, int thatLength, boolean useLowerCase, boolean useRawByte) {
int c1 = 0;
int c2 = 0;
- int s1Start = thisStart + getNumBytesToStoreLength(utflen1);
- int s2Start = thatStart + getNumBytesToStoreLength(utflen2);
-
- while (c1 < utflen1 && c2 < utflen2) {
+ while (c1 < thisLength && c2 < thatLength) {
char ch1, ch2;
if (useRawByte) {
- ch1 = (char) thisBytes[s1Start + c1];
- ch2 = (char) thatBytes[s2Start + c2];
+ ch1 = (char) thisBytes[thisActualStart + c1];
+ ch2 = (char) thatBytes[thatActualStart + c2];
} else {
- ch1 = (charAt(thisBytes, s1Start + c1));
- ch2 = (charAt(thatBytes, s2Start + c2));
+ ch1 = charAt(thisBytes, thisActualStart + c1);
+ ch2 = charAt(thatBytes, thatActualStart + c2);
if (useLowerCase) {
ch1 = Character.toLowerCase(ch1);
@@ -379,30 +402,29 @@
if (ch1 != ch2) {
return ch1 - ch2;
}
- c1 += charSize(thisBytes, s1Start + c1);
- c2 += charSize(thatBytes, s2Start + c2);
+ c1 += charSize(thisBytes, thisActualStart + c1);
+ c2 += charSize(thatBytes, thatActualStart + c2);
}
- return utflen1 - utflen2;
+ return thisLength - thatLength;
}
- private static int hash(byte[] bytes, int start, boolean useLowerCase, boolean useRawByte, int coefficient, int r) {
+ private static int hash(byte[] bytes, int start, int length, boolean useLowerCase, boolean useRawByte,
+ int coefficient, int r) {
int h = 0;
- int utflen = getUTFLength(bytes, start);
- int sStart = start + getNumBytesToStoreLength(utflen);
int c = 0;
- while (c < utflen) {
+ while (c < length) {
char ch;
if (useRawByte) {
- ch = (char) bytes[sStart + c];
+ ch = (char) bytes[start + c];
} else {
- ch = charAt(bytes, sStart + c);
+ ch = charAt(bytes, start + c);
if (useLowerCase) {
ch = Character.toLowerCase(ch);
}
}
h = (coefficient * h + ch) % r;
- c += charSize(bytes, sStart + c);
+ c += charSize(bytes, start + c);
}
return h;
}