Merged fullstack_lsm_staging upto r3336
git-svn-id: https://hyracks.googlecode.com/svn/trunk@3339 123451ca-8445-de46-9d55-352943316053
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
new file mode 100644
index 0000000..da53d0a
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/pom.xml
@@ -0,0 +1,49 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <artifactId>hyracks-storage-am-lsm-invertedindex-test</artifactId>
+
+ <parent>
+ <artifactId>hyracks-tests</artifactId>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <version>0.2.4-SNAPSHOT</version>
+ <relativePath>..</relativePath>
+ </parent>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.7</source>
+ <target>1.7</target>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <dependencies>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-storage-am-lsm-invertedindex</artifactId>
+ <version>0.2.4-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-test-support</artifactId>
+ <version>0.2.4-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>edu.uci.ics.hyracks</groupId>
+ <artifactId>hyracks-data-std</artifactId>
+ <version>0.2.4-SNAPSHOT</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+</project>
\ No newline at end of file
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..fcb78ad
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexBulkLoadTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public LSMInvertedIndexBulkLoadTest() {
+ super(InvertedIndexType.LSM, true, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..4e2fe37
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public LSMInvertedIndexDeleteTest() {
+ super(InvertedIndexType.LSM, false);
+ }
+}
\ No newline at end of file
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexInsertTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexInsertTest.java
new file mode 100644
index 0000000..e9a1c75
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexInsertTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexInsertTest extends AbstractInvertedIndexLoadTest {
+
+ public LSMInvertedIndexInsertTest() {
+ super(InvertedIndexType.LSM, false, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
new file mode 100644
index 0000000..811919b
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMergeTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public class LSMInvertedIndexMergeTest extends AbstractInvertedIndexLoadTest {
+
+ private final int maxTreesToMerge = AccessMethodTestsConfig.LSM_INVINDEX_MAX_TREES_TO_MERGE;
+
+ public LSMInvertedIndexMergeTest() {
+ super(InvertedIndexType.LSM, true, 1);
+ }
+
+ @Override
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+ ILSMIndexAccessor invIndexAccessor = (ILSMIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+
+ for (int i = 0; i < maxTreesToMerge; i++) {
+ for (int j = 0; j < i; j++) {
+ if (bulkLoad) {
+ LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+ }
+ // Perform merge.
+ invIndexAccessor.scheduleMerge(NoOpIOOperationCallback.INSTANCE);
+ validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMultiBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMultiBulkLoadTest.java
new file mode 100644
index 0000000..adfb689
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexMultiBulkLoadTest.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexMultiBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public LSMInvertedIndexMultiBulkLoadTest() {
+ super(InvertedIndexType.LSM, true, AccessMethodTestsConfig.LSM_INVINDEX_NUM_BULKLOAD_ROUNDS);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexSearchTest.java
new file mode 100644
index 0000000..1528e20
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/LSMInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class LSMInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public LSMInvertedIndexSearchTest() {
+ super(InvertedIndexType.LSM, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..f7a36f0
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexBulkLoadTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedLSMInvertedIndexBulkLoadTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, true, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexDeleteTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..4fd529b
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public PartitionedLSMInvertedIndexDeleteTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, false);
+ }
+}
\ No newline at end of file
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexInsertTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexInsertTest.java
new file mode 100644
index 0000000..4608f81
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexInsertTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexInsertTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedLSMInvertedIndexInsertTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, false, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMergeTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMergeTest.java
new file mode 100644
index 0000000..786afe1
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMergeTest.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import java.io.IOException;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public class PartitionedLSMInvertedIndexMergeTest extends AbstractInvertedIndexLoadTest {
+
+ private final int maxTreesToMerge = AccessMethodTestsConfig.LSM_INVINDEX_MAX_TREES_TO_MERGE;
+
+ public PartitionedLSMInvertedIndexMergeTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, true, 1);
+ }
+
+ @Override
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+ ILSMIndexAccessor invIndexAccessor = (ILSMIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+
+ for (int i = 0; i < maxTreesToMerge; i++) {
+ for (int j = 0; j < i; j++) {
+ if (bulkLoad) {
+ LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+ }
+ // Perform merge.
+ invIndexAccessor.scheduleMerge(NoOpIOOperationCallback.INSTANCE);
+ validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMultiBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMultiBulkLoadTest.java
new file mode 100644
index 0000000..80a3c0b
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexMultiBulkLoadTest.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexMultiBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedLSMInvertedIndexMultiBulkLoadTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, true, AccessMethodTestsConfig.LSM_INVINDEX_NUM_BULKLOAD_ROUNDS);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexSearchTest.java
new file mode 100644
index 0000000..c8a7667
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/PartitionedLSMInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public PartitionedLSMInvertedIndexSearchTest() {
+ super(InvertedIndexType.PARTITIONED_LSM, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..f7783fb
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexDeleteTest.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public abstract class AbstractInvertedIndexDeleteTest extends AbstractInvertedIndexTest {
+
+ protected final int numInsertRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_INSERT_ROUNDS;
+ protected final int numDeleteRounds = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DELETE_ROUNDS;
+ protected final boolean bulkLoad;
+
+ public AbstractInvertedIndexDeleteTest(InvertedIndexType invIndexType, boolean bulkLoad) {
+ super(invIndexType);
+ this.bulkLoad = bulkLoad;
+ }
+
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+
+ for (int i = 0; i < numInsertRounds; i++) {
+ // Start generating documents ids from 0 again.
+ tupleGen.reset();
+
+ if (bulkLoad) {
+ LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+
+ // Delete all documents in a couple of rounds.
+ int numTuplesPerDeleteRound = (int) Math.ceil((float) testCtx.getDocumentCorpus().size()
+ / (float) numDeleteRounds);
+ for (int j = 0; j < numDeleteRounds; j++) {
+ LSMInvertedIndexTestUtils.deleteFromInvIndex(testCtx, harness.getRandom(), numTuplesPerDeleteRound);
+ validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
+ }
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+
+ @Test
+ public void wordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness,
+ invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void ngramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createNGramInvIndexTestContext(harness, invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void hashedNGramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedNGramInvIndexTestContext(harness,
+ invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
new file mode 100644
index 0000000..c855cc4
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexLoadTest.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public abstract class AbstractInvertedIndexLoadTest extends AbstractInvertedIndexTest {
+
+ protected final boolean bulkLoad;
+ protected final int numRounds;
+
+ public AbstractInvertedIndexLoadTest(InvertedIndexType invIndexType, boolean bulkLoad, int numRounds) {
+ super(invIndexType);
+ this.bulkLoad = bulkLoad;
+ this.numRounds = numRounds;
+ }
+
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+
+ for (int i = 0; i < numRounds; i++) {
+ if (bulkLoad) {
+ LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+ validateAndCheckIndex(testCtx);
+ runTinySearchWorkload(testCtx, tupleGen);
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+
+ @Test
+ public void wordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness,
+ invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void ngramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createNGramInvIndexTestContext(harness, invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+
+ @Test
+ public void hashedNGramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedNGramInvIndexTestContext(harness,
+ invIndexType);
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
new file mode 100644
index 0000000..991ff59
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexSearchTest.java
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.EditDistanceSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public abstract class AbstractInvertedIndexSearchTest extends AbstractInvertedIndexTest {
+
+ protected final Logger LOGGER = Logger.getLogger(AbstractInvertedIndexSearchTest.class.getName());
+
+ protected int NUM_DOC_QUERIES = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DOC_QUERIES;
+ protected int NUM_RANDOM_QUERIES = AccessMethodTestsConfig.LSM_INVINDEX_NUM_RANDOM_QUERIES;
+ protected final boolean bulkLoad;
+
+ public AbstractInvertedIndexSearchTest(InvertedIndexType invIndexType, boolean bulkLoad) {
+ super(invIndexType);
+ this.bulkLoad = bulkLoad;
+ }
+
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen,
+ List<IInvertedIndexSearchModifier> searchModifiers) throws IOException, IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ invIndex.create();
+ invIndex.activate();
+
+ if (bulkLoad) {
+ LSMInvertedIndexTestUtils.bulkLoadInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ } else {
+ LSMInvertedIndexTestUtils.insertIntoInvIndex(testCtx, tupleGen, NUM_DOCS_TO_INSERT);
+ }
+ invIndex.validate();
+
+ for (IInvertedIndexSearchModifier searchModifier : searchModifiers) {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Running searches with: " + searchModifier.toString());
+ }
+ LSMInvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(), NUM_DOC_QUERIES,
+ NUM_RANDOM_QUERIES, searchModifier, SCAN_COUNT_ARRAY);
+ }
+
+ invIndex.deactivate();
+ invIndex.destroy();
+ }
+
+ private void testWordInvIndexIndex(LSMInvertedIndexTestContext testCtx) throws IOException, IndexException {
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<IInvertedIndexSearchModifier>();
+ searchModifiers.add(new ConjunctiveSearchModifier());
+ searchModifiers.add(new JaccardSearchModifier(1.0f));
+ searchModifiers.add(new JaccardSearchModifier(0.9f));
+ searchModifiers.add(new JaccardSearchModifier(0.7f));
+ searchModifiers.add(new JaccardSearchModifier(0.5f));
+ runTest(testCtx, tupleGen, searchModifiers);
+ }
+
+ private void testNGramInvIndexIndex(LSMInvertedIndexTestContext testCtx) throws IOException, IndexException {
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ List<IInvertedIndexSearchModifier> searchModifiers = new ArrayList<IInvertedIndexSearchModifier>();
+ searchModifiers.add(new ConjunctiveSearchModifier());
+ searchModifiers.add(new JaccardSearchModifier(1.0f));
+ searchModifiers.add(new JaccardSearchModifier(0.9f));
+ searchModifiers.add(new JaccardSearchModifier(0.7f));
+ searchModifiers.add(new JaccardSearchModifier(0.5f));
+ searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 0));
+ searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 1));
+ searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 2));
+ searchModifiers.add(new EditDistanceSearchModifier(LSMInvertedIndexTestUtils.TEST_GRAM_LENGTH, 3));
+ runTest(testCtx, tupleGen, searchModifiers);
+ }
+
+ @Test
+ public void wordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createWordInvIndexTestContext(harness, invIndexType);
+ testWordInvIndexIndex(testCtx);
+ }
+
+ @Test
+ public void hashedWordTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedWordInvIndexTestContext(harness,
+ invIndexType);
+ testWordInvIndexIndex(testCtx);
+ }
+
+ @Test
+ public void ngramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createNGramInvIndexTestContext(harness, invIndexType);
+ testNGramInvIndexIndex(testCtx);
+ }
+
+ @Test
+ public void hashedNGramTokensInvIndexTest() throws IOException, IndexException {
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedNGramInvIndexTestContext(harness,
+ invIndexType);
+ testNGramInvIndexIndex(testCtx);
+ }
+
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
new file mode 100644
index 0000000..90a6d54
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/AbstractInvertedIndexTest.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.IOException;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.After;
+import org.junit.Before;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public abstract class AbstractInvertedIndexTest {
+ protected final Logger LOGGER = Logger.getLogger(AbstractInvertedIndexTest.class.getName());
+
+ protected final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+
+ protected final int NUM_DOCS_TO_INSERT = AccessMethodTestsConfig.LSM_INVINDEX_NUM_DOCS_TO_INSERT;
+ protected final int[] SCAN_COUNT_ARRAY = new int[AccessMethodTestsConfig.LSM_INVINDEX_SCAN_COUNT_ARRAY_SIZE];
+
+ protected final int TINY_WORKLOAD_NUM_DOC_QUERIES = AccessMethodTestsConfig.LSM_INVINDEX_TINY_NUM_DOC_QUERIES;
+ protected final int TINY_WORKLOAD_NUM_RANDOM_QUERIES = AccessMethodTestsConfig.LSM_INVINDEX_TINY_NUM_RANDOM_QUERIES;
+
+ // Note: The edit-distance search modifier is tested separately.
+ protected final IInvertedIndexSearchModifier[] TEST_SEARCH_MODIFIERS = new IInvertedIndexSearchModifier[] {
+ new ConjunctiveSearchModifier(), new JaccardSearchModifier(0.8f), new JaccardSearchModifier(0.5f) };
+
+ protected final InvertedIndexType invIndexType;
+
+ public AbstractInvertedIndexTest(InvertedIndexType invIndexType) {
+ this.invIndexType = invIndexType;
+ }
+
+ @Before
+ public void setUp() throws HyracksException {
+ harness.setUp();
+ }
+
+ @After
+ public void tearDown() throws HyracksDataException {
+ harness.tearDown();
+ }
+
+ /**
+ * Validates the index, and compares it against the expected index.
+ * This test is only for verifying the integrity and correctness of the index,
+ * it does not ensure the correctness of index searches.
+ */
+ protected void validateAndCheckIndex(LSMInvertedIndexTestContext testCtx) throws HyracksDataException, IndexException {
+ IIndex invIndex = testCtx.getIndex();
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Validating index: " + invIndex);
+ }
+ // Validate index and compare against expected index.
+ invIndex.validate();
+ if (invIndexType == InvertedIndexType.INMEMORY || invIndexType == InvertedIndexType.ONDISK) {
+ // This comparison method exercises different features of these types of inverted indexes.
+ LSMInvertedIndexTestUtils.compareActualAndExpectedIndexes(testCtx);
+ }
+ LSMInvertedIndexTestUtils.compareActualAndExpectedIndexesRangeSearch(testCtx);
+ }
+
+ /**
+ * Runs a workload of queries using different search modifiers, and verifies the correctness of the results.
+ */
+ protected void runTinySearchWorkload(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen) throws IOException,
+ IndexException {
+ for (IInvertedIndexSearchModifier searchModifier : TEST_SEARCH_MODIFIERS) {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("Running test workload with: " + searchModifier.toString());
+ }
+ LSMInvertedIndexTestUtils.testIndexSearch(testCtx, tupleGen, harness.getRandom(),
+ TINY_WORKLOAD_NUM_DOC_QUERIES, TINY_WORKLOAD_NUM_RANDOM_QUERIES, searchModifier, SCAN_COUNT_ARRAY);
+ }
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
new file mode 100644
index 0000000..5be1d6a
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/common/LSMInvertedIndexTestHarness.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.api.io.IODeviceHandle;
+import edu.uci.ics.hyracks.control.nc.io.IOManager;
+import edu.uci.ics.hyracks.storage.am.common.api.IInMemoryFreePageManager;
+import edu.uci.ics.hyracks.storage.am.common.frames.LIFOMetaDataFrameFactory;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.IInMemoryBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationCallbackProvider;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMIOOperationScheduler;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMMergePolicy;
+import edu.uci.ics.hyracks.storage.am.lsm.common.api.ILSMOperationTrackerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.DualIndexInMemoryBufferCache;
+import edu.uci.ics.hyracks.storage.am.lsm.common.freepage.DualIndexInMemoryFreePageManager;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoMergePolicy;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.SynchronousScheduler;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.ThreadCountingOperationTrackerFactory;
+import edu.uci.ics.hyracks.storage.common.buffercache.HeapBufferAllocator;
+import edu.uci.ics.hyracks.storage.common.buffercache.IBufferCache;
+import edu.uci.ics.hyracks.storage.common.file.IFileMapProvider;
+import edu.uci.ics.hyracks.test.support.TestStorageManagerComponentHolder;
+import edu.uci.ics.hyracks.test.support.TestUtils;
+
+public class LSMInvertedIndexTestHarness {
+
+ private static final long RANDOM_SEED = 50;
+
+ protected final int diskPageSize;
+ protected final int diskNumPages;
+ protected final int diskMaxOpenFiles;
+ protected final int memPageSize;
+ protected final int memNumPages;
+ protected final int hyracksFrameSize;
+
+ protected IOManager ioManager;
+ protected IBufferCache diskBufferCache;
+ protected IFileMapProvider diskFileMapProvider;
+ protected IInMemoryBufferCache memBufferCache;
+ protected IInMemoryFreePageManager memFreePageManager;
+ protected IHyracksTaskContext ctx;
+ protected ILSMIOOperationScheduler ioScheduler;
+ protected ILSMMergePolicy mergePolicy;
+ protected ILSMOperationTrackerFactory opTrackerFactory;
+ protected ILSMIOOperationCallbackProvider ioOpCallbackProvider;
+
+ protected final Random rnd = new Random();
+ protected final static SimpleDateFormat simpleDateFormat = new SimpleDateFormat("ddMMyy-hhmmssSS");
+ protected final static String sep = System.getProperty("file.separator");
+ protected String onDiskDir;
+ protected String btreeFileName = "btree_vocab";
+ protected String invIndexFileName = "inv_index";
+ protected FileReference invIndexFileRef;
+
+ public LSMInvertedIndexTestHarness() {
+ this.diskPageSize = AccessMethodTestsConfig.LSM_INVINDEX_DISK_PAGE_SIZE;
+ this.diskNumPages = AccessMethodTestsConfig.LSM_INVINDEX_DISK_NUM_PAGES;
+ this.diskMaxOpenFiles = AccessMethodTestsConfig.LSM_INVINDEX_DISK_MAX_OPEN_FILES;
+ this.memPageSize = AccessMethodTestsConfig.LSM_INVINDEX_MEM_PAGE_SIZE;
+ this.memNumPages = AccessMethodTestsConfig.LSM_INVINDEX_MEM_NUM_PAGES;
+ this.hyracksFrameSize = AccessMethodTestsConfig.LSM_INVINDEX_HYRACKS_FRAME_SIZE;
+ this.ioScheduler = SynchronousScheduler.INSTANCE;
+ this.mergePolicy = NoMergePolicy.INSTANCE;
+ this.opTrackerFactory = ThreadCountingOperationTrackerFactory.INSTANCE;
+ this.ioOpCallbackProvider = NoOpIOOperationCallback.INSTANCE;
+ }
+
+ public LSMInvertedIndexTestHarness(int diskPageSize, int diskNumPages, int diskMaxOpenFiles, int memPageSize,
+ int memNumPages, int hyracksFrameSize) {
+ this.diskPageSize = diskPageSize;
+ this.diskNumPages = diskNumPages;
+ this.diskMaxOpenFiles = diskMaxOpenFiles;
+ this.memPageSize = memPageSize;
+ this.memNumPages = memNumPages;
+ this.hyracksFrameSize = hyracksFrameSize;
+ this.ioScheduler = SynchronousScheduler.INSTANCE;
+ this.mergePolicy = NoMergePolicy.INSTANCE;
+ this.opTrackerFactory = ThreadCountingOperationTrackerFactory.INSTANCE;
+ }
+
+ public void setUp() throws HyracksException {
+ onDiskDir = "lsm_invertedindex_" + simpleDateFormat.format(new Date()) + sep;
+ ctx = TestUtils.create(getHyracksFrameSize());
+ TestStorageManagerComponentHolder.init(diskPageSize, diskNumPages, diskMaxOpenFiles);
+ diskBufferCache = TestStorageManagerComponentHolder.getBufferCache(ctx);
+ diskFileMapProvider = TestStorageManagerComponentHolder.getFileMapProvider(ctx);
+ memBufferCache = new DualIndexInMemoryBufferCache(new HeapBufferAllocator(), memPageSize, memNumPages);
+ memBufferCache.open();
+ memFreePageManager = new DualIndexInMemoryFreePageManager(memNumPages, new LIFOMetaDataFrameFactory());
+ ioManager = TestStorageManagerComponentHolder.getIOManager();
+ rnd.setSeed(RANDOM_SEED);
+ invIndexFileRef = ioManager.getIODevices().get(0).createFileReference(onDiskDir + invIndexFileName);
+ }
+
+ public void tearDown() throws HyracksDataException {
+ diskBufferCache.close();
+ for (IODeviceHandle dev : ioManager.getIODevices()) {
+ File dir = new File(dev.getPath(), onDiskDir);
+ FilenameFilter filter = new FilenameFilter() {
+ public boolean accept(File dir, String name) {
+ return !name.startsWith(".");
+ }
+ };
+ String[] files = dir.list(filter);
+ if (files != null) {
+ for (String fileName : files) {
+ File file = new File(dir.getPath() + File.separator + fileName);
+ file.delete();
+ }
+ }
+ dir.delete();
+ }
+ memBufferCache.close();
+ }
+
+ public FileReference getInvListsFileRef() {
+ return invIndexFileRef;
+ }
+
+ public int getDiskPageSize() {
+ return diskPageSize;
+ }
+
+ public int getDiskNumPages() {
+ return diskNumPages;
+ }
+
+ public int getDiskMaxOpenFiles() {
+ return diskMaxOpenFiles;
+ }
+
+ public int getMemPageSize() {
+ return memPageSize;
+ }
+
+ public int getMemNumPages() {
+ return memNumPages;
+ }
+
+ public int getHyracksFrameSize() {
+ return hyracksFrameSize;
+ }
+
+ public IOManager getIOManager() {
+ return ioManager;
+ }
+
+ public IBufferCache getDiskBufferCache() {
+ return diskBufferCache;
+ }
+
+ public IFileMapProvider getDiskFileMapProvider() {
+ return diskFileMapProvider;
+ }
+
+ public IInMemoryBufferCache getMemBufferCache() {
+ return memBufferCache;
+ }
+
+ public IInMemoryFreePageManager getMemFreePageManager() {
+ return memFreePageManager;
+ }
+
+ public IHyracksTaskContext getHyracksTastContext() {
+ return ctx;
+ }
+
+ public String getOnDiskDir() {
+ return onDiskDir;
+ }
+
+ public Random getRandom() {
+ return rnd;
+ }
+
+ public ILSMIOOperationScheduler getIOScheduler() {
+ return ioScheduler;
+ }
+
+ public ILSMOperationTrackerFactory getOperationTrackerFactory() {
+ return opTrackerFactory;
+ }
+
+ public ILSMMergePolicy getMergePolicy() {
+ return mergePolicy;
+ }
+
+ public ILSMIOOperationCallbackProvider getIOOperationCallbackProvider() {
+ return ioOpCallbackProvider;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..c71d996
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class InMemoryInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public InMemoryInvertedIndexDeleteTest() {
+ super(InvertedIndexType.INMEMORY, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java
new file mode 100644
index 0000000..d2b883d
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexInsertTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class InMemoryInvertedIndexInsertTest extends AbstractInvertedIndexLoadTest {
+
+ public InMemoryInvertedIndexInsertTest() {
+ super(InvertedIndexType.INMEMORY, false, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexSearchTest.java
new file mode 100644
index 0000000..f3b3026
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/InMemoryInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class InMemoryInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public InMemoryInvertedIndexSearchTest() {
+ super(InvertedIndexType.INMEMORY, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java
new file mode 100644
index 0000000..eac7765
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexDeleteTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexDeleteTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexDeleteTest extends AbstractInvertedIndexDeleteTest {
+
+ public PartitionedInMemoryInvertedIndexDeleteTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java
new file mode 100644
index 0000000..8342efd
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexInsertTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexInsertTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedInMemoryInvertedIndexInsertTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java
new file mode 100644
index 0000000..385d65d
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/inmemory/PartitionedInMemoryInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.inmemory;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedInMemoryInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public PartitionedInMemoryInvertedIndexSearchTest() {
+ super(InvertedIndexType.PARTITIONED_INMEMORY, false);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTest.java
new file mode 100644
index 0000000..bd48068
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTest.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.multithread;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.api.exceptions.HyracksException;
+import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector.TestOperation;
+import edu.uci.ics.hyracks.storage.am.common.TestWorkloadConf;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.api.TreeIndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.ProbabilityHelper;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.config.AccessMethodTestsConfig;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestUtils;
+
+public class LSMInvertedIndexMultiThreadTest {
+
+ protected final Logger LOGGER = Logger.getLogger(LSMInvertedIndexMultiThreadTest.class.getName());
+
+ // Machine-specific number of threads to use for testing.
+ protected final int REGULAR_NUM_THREADS = Runtime.getRuntime().availableProcessors();
+ // Excessive number of threads for testing.
+ protected final int EXCESSIVE_NUM_THREADS = Runtime.getRuntime().availableProcessors() * 4;
+ protected final int NUM_OPERATIONS = AccessMethodTestsConfig.LSM_INVINDEX_MULTITHREAD_NUM_OPERATIONS;
+
+ protected final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+ protected final LSMInvertedIndexWorkerFactory workerFactory = new LSMInvertedIndexWorkerFactory();
+ protected final ArrayList<TestWorkloadConf> workloadConfs = getTestWorkloadConf();
+
+ protected void setUp() throws HyracksException {
+ harness.setUp();
+ }
+
+ protected void tearDown() throws HyracksDataException {
+ harness.tearDown();
+ }
+
+ protected void runTest(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numThreads,
+ TestWorkloadConf conf, String dataMsg) throws InterruptedException, TreeIndexException, HyracksException {
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("LSMInvertedIndex MultiThread Test:\nData: " + dataMsg + "; Threads: " + numThreads
+ + "; Workload: " + conf.toString() + ".");
+ }
+
+ // 4 batches per thread.
+ int batchSize = (NUM_OPERATIONS / numThreads) / 4;
+
+ LSMInvertedIndexMultiThreadTestDriver driver = new LSMInvertedIndexMultiThreadTestDriver(testCtx.getIndex(),
+ workerFactory, tupleGen.getFieldSerdes(), tupleGen.getFieldGens(), conf.ops, conf.opProbs);
+ driver.init();
+ long[] times = driver.run(numThreads, 1, NUM_OPERATIONS, batchSize);
+ testCtx.getIndex().validate();
+ driver.deinit();
+
+ if (LOGGER.isLoggable(Level.INFO)) {
+ LOGGER.info("LSMInvertedIndex MultiThread Test Time: " + times[0] + "ms");
+ }
+ }
+
+ protected ArrayList<TestWorkloadConf> getTestWorkloadConf() {
+ ArrayList<TestWorkloadConf> workloadConfs = new ArrayList<TestWorkloadConf>();
+
+ // Insert only workload.
+ TestOperation[] insertOnlyOps = new TestOperation[] { TestOperation.INSERT };
+ workloadConfs.add(new TestWorkloadConf(insertOnlyOps, ProbabilityHelper
+ .getUniformProbDist(insertOnlyOps.length)));
+
+ // Insert and merge workload.
+ TestOperation[] insertMergeOps = new TestOperation[] { TestOperation.INSERT, TestOperation.MERGE };
+ workloadConfs.add(new TestWorkloadConf(insertMergeOps, ProbabilityHelper
+ .getUniformProbDist(insertMergeOps.length)));
+
+ // Inserts mixed with point searches and scans.
+ TestOperation[] insertSearchOnlyOps = new TestOperation[] { TestOperation.INSERT, TestOperation.POINT_SEARCH,
+ TestOperation.SCAN };
+ workloadConfs.add(new TestWorkloadConf(insertSearchOnlyOps, ProbabilityHelper
+ .getUniformProbDist(insertSearchOnlyOps.length)));
+
+ // Inserts, and deletes.
+ TestOperation[] insertDeleteUpdateOps = new TestOperation[] { TestOperation.INSERT, TestOperation.DELETE };
+ workloadConfs.add(new TestWorkloadConf(insertDeleteUpdateOps, ProbabilityHelper
+ .getUniformProbDist(insertDeleteUpdateOps.length)));
+
+ // Inserts, deletes and merges.
+ TestOperation[] insertDeleteUpdateMergeOps = new TestOperation[] { TestOperation.INSERT, TestOperation.DELETE,
+ TestOperation.MERGE };
+ workloadConfs.add(new TestWorkloadConf(insertDeleteUpdateMergeOps, ProbabilityHelper
+ .getUniformProbDist(insertDeleteUpdateMergeOps.length)));
+
+ // All operations except merge.
+ TestOperation[] allNoMergeOps = new TestOperation[] { TestOperation.INSERT, TestOperation.DELETE,
+ TestOperation.POINT_SEARCH, TestOperation.SCAN };
+ workloadConfs.add(new TestWorkloadConf(allNoMergeOps, ProbabilityHelper
+ .getUniformProbDist(allNoMergeOps.length)));
+
+ // All operations.
+ TestOperation[] allOps = new TestOperation[] { TestOperation.INSERT, TestOperation.DELETE,
+ TestOperation.POINT_SEARCH, TestOperation.SCAN, TestOperation.MERGE };
+ workloadConfs.add(new TestWorkloadConf(allOps, ProbabilityHelper.getUniformProbDist(allOps.length)));
+
+ return workloadConfs;
+ }
+
+ @Test
+ public void wordTokensInvIndexTest() throws IOException, IndexException, InterruptedException {
+ String dataMsg = "Documents";
+ int[] numThreads = new int[] { REGULAR_NUM_THREADS, EXCESSIVE_NUM_THREADS };
+ for (int i = 0; i < numThreads.length; i++) {
+ for (TestWorkloadConf conf : workloadConfs) {
+ setUp();
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createWordInvIndexTestContext(harness,
+ getIndexType());
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createStringDocumentTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen, numThreads[i], conf, dataMsg);
+ tearDown();
+ }
+ }
+ }
+
+ @Test
+ public void hashedNGramTokensInvIndexTest() throws IOException, IndexException, InterruptedException {
+ String dataMsg = "Person Names";
+ int[] numThreads = new int[] { REGULAR_NUM_THREADS, EXCESSIVE_NUM_THREADS };
+ for (int i = 0; i < numThreads.length; i++) {
+ for (TestWorkloadConf conf : workloadConfs) {
+ setUp();
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestUtils.createHashedNGramInvIndexTestContext(
+ harness, getIndexType());
+ TupleGenerator tupleGen = LSMInvertedIndexTestUtils.createPersonNamesTupleGen(harness.getRandom());
+ runTest(testCtx, tupleGen, numThreads[i], conf, dataMsg);
+ tearDown();
+ }
+ }
+ }
+
+ protected InvertedIndexType getIndexType() {
+ return InvertedIndexType.LSM;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTestDriver.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTestDriver.java
new file mode 100644
index 0000000..6159969
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexMultiThreadTestDriver.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.multithread;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.common.IIndexTestWorkerFactory;
+import edu.uci.ics.hyracks.storage.am.common.IndexMultiThreadTestDriver;
+import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector.TestOperation;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
+import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
+
+@SuppressWarnings("rawtypes")
+public class LSMInvertedIndexMultiThreadTestDriver extends IndexMultiThreadTestDriver {
+
+ protected final IFieldValueGenerator[] fieldGens;
+
+ public LSMInvertedIndexMultiThreadTestDriver(IIndex index, IIndexTestWorkerFactory workerFactory,
+ ISerializerDeserializer[] fieldSerdes, IFieldValueGenerator[] fieldGens, TestOperation[] ops,
+ double[] opProbs) {
+ super(index, workerFactory, fieldSerdes, ops, opProbs);
+ this.fieldGens = fieldGens;
+ }
+
+ public DataGenThread createDatagenThread(int numThreads, int numBatches, int batchSize) {
+ return new DataGenThread(numThreads, numBatches, batchSize, fieldSerdes, fieldGens, RANDOM_SEED, 2 * numThreads);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexTestWorker.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexTestWorker.java
new file mode 100644
index 0000000..d6bb3d3
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexTestWorker.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.multithread;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.AbstractIndexTestWorker;
+import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector;
+import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector.TestOperation;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
+import edu.uci.ics.hyracks.storage.am.lsm.common.impls.NoOpIOOperationCallback;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.impls.LSMInvertedIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.ConjunctiveSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.JaccardSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+
+public class LSMInvertedIndexTestWorker extends AbstractIndexTestWorker {
+
+ protected final LSMInvertedIndex invIndex;
+ protected final List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
+ protected final Random rnd = new Random(50);
+
+ protected final IInvertedIndexSearchModifier[] TEST_SEARCH_MODIFIERS = new IInvertedIndexSearchModifier[] {
+ new ConjunctiveSearchModifier(), new JaccardSearchModifier(0.8f), new JaccardSearchModifier(0.5f) };
+
+ public LSMInvertedIndexTestWorker(DataGenThread dataGen, TestOperationSelector opSelector, IIndex index,
+ int numBatches) {
+ super(dataGen, opSelector, index, numBatches);
+ invIndex = (LSMInvertedIndex) index;
+ }
+
+ @Override
+ public void performOp(ITupleReference tuple, TestOperation op) throws HyracksDataException, IndexException {
+ LSMInvertedIndexAccessor accessor = (LSMInvertedIndexAccessor) indexAccessor;
+ IIndexCursor searchCursor = accessor.createSearchCursor();
+ IIndexCursor rangeSearchCursor = accessor.createRangeSearchCursor();
+ RangePredicate rangePred = new RangePredicate(null, null, true, true, null, null);
+ IBinaryTokenizerFactory tokenizerFactory = invIndex.getTokenizerFactory();
+ int searchModifierIndex = Math.abs(rnd.nextInt()) % TEST_SEARCH_MODIFIERS.length;
+ InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizerFactory.createTokenizer(),
+ TEST_SEARCH_MODIFIERS[searchModifierIndex]);
+
+ switch (op) {
+ case INSERT: {
+ insert(accessor, tuple);
+ break;
+ }
+
+ case DELETE: {
+ // Randomly pick a document from the corpus to delete.
+ if (!documentCorpus.isEmpty()) {
+ int docIndex = Math.abs(rnd.nextInt()) % documentCorpus.size();
+ ITupleReference deleteTuple = documentCorpus.get(docIndex);
+ accessor.delete(deleteTuple);
+ // Swap tupleIndex with last element.
+ documentCorpus.set(docIndex, documentCorpus.get(documentCorpus.size() - 1));
+ documentCorpus.remove(documentCorpus.size() - 1);
+ } else {
+ // No existing documents to delete, treat this case as an insert.
+ insert(accessor, tuple);
+ }
+ break;
+ }
+
+ case POINT_SEARCH: {
+ searchCursor.reset();
+ searchPred.setQueryTuple(tuple);
+ searchPred.setQueryFieldIndex(0);
+ try {
+ accessor.search(searchCursor, searchPred);
+ consumeCursorTuples(searchCursor);
+ } catch (OccurrenceThresholdPanicException e) {
+ // Ignore.
+ }
+ break;
+ }
+
+ case SCAN: {
+ rangeSearchCursor.reset();
+ accessor.rangeSearch(rangeSearchCursor, rangePred);
+ consumeCursorTuples(rangeSearchCursor);
+ break;
+ }
+
+ case MERGE: {
+ accessor.scheduleMerge(NoOpIOOperationCallback.INSTANCE);
+ break;
+ }
+
+ default:
+ throw new HyracksDataException("Op " + op.toString() + " not supported.");
+ }
+ }
+
+ private void insert(LSMInvertedIndexAccessor accessor, ITupleReference tuple) throws HyracksDataException,
+ IndexException {
+ // Ignore ongoing merges. Do an insert instead.
+ accessor.insert(tuple);
+ // Add tuple to document corpus so we can delete it.
+ ITupleReference copyTuple = TupleUtils.copyTuple(tuple);
+ documentCorpus.add(copyTuple);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexWorkerFactory.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexWorkerFactory.java
new file mode 100644
index 0000000..cce843b
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/LSMInvertedIndexWorkerFactory.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.multithread;
+
+import edu.uci.ics.hyracks.storage.am.common.AbstractIndexTestWorker;
+import edu.uci.ics.hyracks.storage.am.common.IIndexTestWorkerFactory;
+import edu.uci.ics.hyracks.storage.am.common.TestOperationSelector;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DataGenThread;
+
+public class LSMInvertedIndexWorkerFactory implements IIndexTestWorkerFactory {
+ @Override
+ public AbstractIndexTestWorker create(DataGenThread dataGen, TestOperationSelector opSelector,
+ IIndex index, int numBatches) {
+ return new LSMInvertedIndexTestWorker(dataGen, opSelector, index, numBatches);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/PartitionedLSMInvertedIndexMultiThreadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/PartitionedLSMInvertedIndexMultiThreadTest.java
new file mode 100644
index 0000000..1adaf61
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/multithread/PartitionedLSMInvertedIndexMultiThreadTest.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.multithread;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedLSMInvertedIndexMultiThreadTest extends LSMInvertedIndexMultiThreadTest {
+
+ protected InvertedIndexType getIndexType() {
+ return InvertedIndexType.PARTITIONED_LSM;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java
new file mode 100644
index 0000000..3059062
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/FixedSizeFrameTupleTest.java
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Random;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk.FixedSizeFrameTupleAppender;
+
+public class FixedSizeFrameTupleTest {
+
+ private static int FRAME_SIZE = 4096;
+
+ private Random rnd = new Random(50);
+
+ /**
+ * This test verifies the correct behavior of the FixedSizeFrameTuple class.
+ * Frames containing FixedSizeFrameTuple's require neither tuple slots nor
+ * field slots. The tests inserts generated data into a frame until the
+ * frame is full, and then verifies the frame's contents.
+ *
+ */
+ @Test
+ public void singleFieldTest() throws Exception {
+ ByteBuffer buffer = ByteBuffer.allocate(FRAME_SIZE);
+
+ ITypeTraits[] fields = new ITypeTraits[1];
+ fields[0] = IntegerPointable.TYPE_TRAITS;
+
+ FixedSizeFrameTupleAppender ftapp = new FixedSizeFrameTupleAppender(FRAME_SIZE, fields);
+ FixedSizeFrameTupleAccessor ftacc = new FixedSizeFrameTupleAccessor(FRAME_SIZE, fields);
+
+ boolean frameHasSpace = true;
+
+ ArrayList<Integer> check = new ArrayList<Integer>();
+
+ ftapp.reset(buffer, true);
+ while (frameHasSpace) {
+ int val = rnd.nextInt();
+ frameHasSpace = ftapp.append(val);
+ if (frameHasSpace) {
+ check.add(val);
+ ftapp.incrementTupleCount(1);
+ }
+ }
+
+ ftacc.reset(buffer);
+ for (int i = 0; i < ftacc.getTupleCount(); i++) {
+ int val = IntegerSerializerDeserializer.getInt(ftacc.getBuffer().array(), ftacc.getTupleStartOffset(i));
+ Assert.assertEquals(check.get(i).intValue(), val);
+ }
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..4813615
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexBulkLoadTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class OnDiskInvertedIndexBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public OnDiskInvertedIndexBulkLoadTest() {
+ super(InvertedIndexType.ONDISK, true, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java
new file mode 100644
index 0000000..09432de
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexLifecycleTest.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import java.io.File;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.io.FileReference;
+import edu.uci.ics.hyracks.data.std.accessors.PointableBinaryComparatorFactory;
+import edu.uci.ics.hyracks.data.std.primitive.IntegerPointable;
+import edu.uci.ics.hyracks.data.std.primitive.UTF8StringPointable;
+import edu.uci.ics.hyracks.storage.am.common.AbstractIndexLifecycleTest;
+import edu.uci.ics.hyracks.storage.am.common.api.ITreeIndexFrame;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListBuilder;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+
+public class OnDiskInvertedIndexLifecycleTest extends AbstractIndexLifecycleTest {
+
+ private final LSMInvertedIndexTestHarness harness = new LSMInvertedIndexTestHarness();
+ private ITreeIndexFrame frame = null;
+
+ @Override
+ protected boolean persistentStateExists() throws Exception {
+ return harness.getInvListsFileRef().getFile().exists()
+ && ((OnDiskInvertedIndex) index).getBTree().getFileReference().getFile().exists();
+ }
+
+ @Override
+ protected boolean isEmptyIndex() throws Exception {
+ if (frame == null) {
+ frame = ((OnDiskInvertedIndex) index).getBTree().getLeafFrameFactory().createFrame();
+ }
+ return ((OnDiskInvertedIndex) index).getBTree().isEmptyTree(frame);
+ }
+
+ @Override
+ public void setup() throws Exception {
+ harness.setUp();
+ ITypeTraits[] tokenTypeTraits = new ITypeTraits[] { UTF8StringPointable.TYPE_TRAITS };
+ IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(UTF8StringPointable.FACTORY) };
+ ITypeTraits[] invListTypeTraits = new ITypeTraits[] { IntegerPointable.TYPE_TRAITS };
+ IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[] { PointableBinaryComparatorFactory
+ .of(IntegerPointable.FACTORY) };
+ IInvertedListBuilder invListBuilder = new FixedSizeElementInvertedListBuilder(invListTypeTraits);
+ FileReference btreeFile = new FileReference(new File(harness.getInvListsFileRef().getFile().getPath() + "_btree"));
+ index = new OnDiskInvertedIndex(harness.getDiskBufferCache(), harness.getDiskFileMapProvider(), invListBuilder,
+ invListTypeTraits, invListCmpFactories, tokenTypeTraits, tokenCmpFactories, harness.getInvListsFileRef(),
+ btreeFile);
+
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ index.deactivate();
+ index.destroy();
+ harness.tearDown();
+ }
+
+ @Override
+ protected void performInsertions() throws Exception {
+ // Do nothing.
+ }
+
+ @Override
+ protected void checkInsertions() throws Exception {
+ // Do nothing.
+ }
+
+ @Override
+ protected void clearCheckableInsertions() throws Exception {
+ // Do nothing.
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java
new file mode 100644
index 0000000..76d9200
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/OnDiskInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class OnDiskInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public OnDiskInvertedIndexSearchTest() {
+ super(InvertedIndexType.ONDISK, true);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexBulkLoadTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexBulkLoadTest.java
new file mode 100644
index 0000000..f641630
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexBulkLoadTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexLoadTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedOnDiskInvertedIndexBulkLoadTest extends AbstractInvertedIndexLoadTest {
+
+ public PartitionedOnDiskInvertedIndexBulkLoadTest() {
+ super(InvertedIndexType.PARTITIONED_ONDISK, true, 1);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexSearchTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexSearchTest.java
new file mode 100644
index 0000000..4fa25ed
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/ondisk/PartitionedOnDiskInvertedIndexSearchTest.java
@@ -0,0 +1,26 @@
+/*
+ * Copyright 2009-2012 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.ondisk;
+
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.AbstractInvertedIndexSearchTest;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+public class PartitionedOnDiskInvertedIndexSearchTest extends AbstractInvertedIndexSearchTest {
+
+ public PartitionedOnDiskInvertedIndexSearchTest() {
+ super(InvertedIndexType.PARTITIONED_ONDISK, true);
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
new file mode 100644
index 0000000..33ea4f5
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/NGramTokenizerTest.java
@@ -0,0 +1,228 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
+
+public class NGramTokenizerTest {
+
+ private char PRECHAR = '#';
+ private char POSTCHAR = '$';
+
+ private String str = "Jürgen S. Generic's Car";
+ private byte[] inputBuffer;
+
+ private int gramLength = 3;
+
+ private void getExpectedGrams(String s, int gramLength, ArrayList<String> grams, boolean prePost) {
+
+ String tmp = s.toLowerCase();
+ if (prePost) {
+ StringBuilder preBuilder = new StringBuilder();
+ for (int i = 0; i < gramLength - 1; i++) {
+ preBuilder.append(PRECHAR);
+ }
+ String pre = preBuilder.toString();
+
+ StringBuilder postBuilder = new StringBuilder();
+ for (int i = 0; i < gramLength - 1; i++) {
+ postBuilder.append(POSTCHAR);
+ }
+ String post = postBuilder.toString();
+
+ tmp = pre + s.toLowerCase() + post;
+ }
+
+ for (int i = 0; i < tmp.length() - gramLength + 1; i++) {
+ String gram = tmp.substring(i, i + gramLength);
+ grams.add(gram);
+ }
+ }
+
+ @Before
+ public void init() throws Exception {
+ // serialize string into bytes
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(baos);
+ dos.writeUTF(str);
+ inputBuffer = baos.toByteArray();
+ }
+
+ void runTestNGramTokenizerWithCountedHashedUTF8Tokens(boolean prePost) throws IOException {
+ HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, false,
+ false, tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+ ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+ HashMap<String, Integer> gramCounts = new HashMap<String, Integer>();
+ for (String s : expectedGrams) {
+ Integer count = gramCounts.get(s);
+ if (count == null) {
+ count = 1;
+ gramCounts.put(s, count);
+ } else {
+ count++;
+ }
+
+ int hash = tokenHash(s, count);
+ expectedHashedGrams.add(hash);
+ }
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedGram = in.readInt();
+
+ // System.out.println(hashedGram);
+
+ Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ void runTestNGramTokenizerWithHashedUTF8Tokens(boolean prePost) throws IOException {
+ HashedUTF8NGramTokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+ tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+ ArrayList<Integer> expectedHashedGrams = new ArrayList<Integer>();
+ for (String s : expectedGrams) {
+ int hash = tokenHash(s, 1);
+ expectedHashedGrams.add(hash);
+ }
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedGram = in.readInt();
+
+ // System.out.println(hashedGram);
+
+ Assert.assertEquals(expectedHashedGrams.get(tokenCount), hashedGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ void runTestNGramTokenizerWithUTF8Tokens(boolean prePost) throws IOException {
+ UTF8NGramTokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ NGramUTF8StringBinaryTokenizer tokenizer = new NGramUTF8StringBinaryTokenizer(gramLength, prePost, true, false,
+ tokenFactory);
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ ArrayList<String> expectedGrams = new ArrayList<String>();
+ getExpectedGrams(str, gramLength, expectedGrams, prePost);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ String strGram = in.readUTF();
+
+ // System.out.println("\"" + strGram + "\"");
+
+ Assert.assertEquals(expectedGrams.get(tokenCount), strGram);
+
+ tokenCount++;
+ }
+ // System.out.println("---------");
+ }
+
+ @Test
+ public void testNGramTokenizerWithCountedHashedUTF8Tokens() throws Exception {
+ runTestNGramTokenizerWithCountedHashedUTF8Tokens(false);
+ runTestNGramTokenizerWithCountedHashedUTF8Tokens(true);
+ }
+
+ @Test
+ public void testNGramTokenizerWithHashedUTF8Tokens() throws Exception {
+ runTestNGramTokenizerWithHashedUTF8Tokens(false);
+ runTestNGramTokenizerWithHashedUTF8Tokens(true);
+ }
+
+ @Test
+ public void testNGramTokenizerWithUTF8Tokens() throws IOException {
+ runTestNGramTokenizerWithUTF8Tokens(false);
+ runTestNGramTokenizerWithUTF8Tokens(true);
+ }
+
+ public int tokenHash(String token, int tokenCount) {
+ int h = AbstractUTF8Token.GOLDEN_RATIO_32;
+ for (int i = 0; i < token.length(); i++) {
+ h ^= token.charAt(i);
+ h *= AbstractUTF8Token.GOLDEN_RATIO_32;
+ }
+ return h + tokenCount;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
new file mode 100644
index 0000000..3ff9304
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/tokenizers/WordTokenizerTest.java
@@ -0,0 +1,210 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.DataOutput;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+
+import junit.framework.Assert;
+
+import org.junit.Before;
+import org.junit.Test;
+
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
+
+public class WordTokenizerTest {
+
+ private String text = "Hello World, I would like to inform you of the importance of Foo Bar. Yes, Foo Bar. Jürgen.";
+ private byte[] inputBuffer;
+
+ private ArrayList<String> expectedUTF8Tokens = new ArrayList<String>();
+ private ArrayList<Integer> expectedHashedUTF8Tokens = new ArrayList<Integer>();
+ private ArrayList<Integer> expectedCountedHashedUTF8Tokens = new ArrayList<Integer>();
+
+ private boolean isSeparator(char c) {
+ return !(Character.isLetterOrDigit(c) || Character.getType(c) == Character.OTHER_LETTER || Character.getType(c) == Character.OTHER_NUMBER);
+ }
+
+ private void tokenize(String text, ArrayList<String> tokens) {
+ String lowerCaseText = text.toLowerCase();
+ int startIx = 0;
+
+ // Skip separators at beginning of string.
+ while (isSeparator(lowerCaseText.charAt(startIx))) {
+ startIx++;
+ }
+ while (startIx < lowerCaseText.length()) {
+ while (startIx < lowerCaseText.length() && isSeparator(lowerCaseText.charAt(startIx))) {
+ startIx++;
+ }
+ int tokenStart = startIx;
+
+ while (startIx < lowerCaseText.length() && !isSeparator(lowerCaseText.charAt(startIx))) {
+ startIx++;
+ }
+ int tokenEnd = startIx;
+
+ // Emit token.
+ String token = lowerCaseText.substring(tokenStart, tokenEnd);
+
+ tokens.add(token);
+ }
+ }
+
+ @Before
+ public void init() throws IOException {
+ // serialize text into bytes
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutput dos = new DataOutputStream(baos);
+ dos.writeUTF(text);
+ inputBuffer = baos.toByteArray();
+
+ // init expected string tokens
+ tokenize(text, expectedUTF8Tokens);
+
+ // hashed tokens ignoring token count
+ for (int i = 0; i < expectedUTF8Tokens.size(); i++) {
+ int hash = tokenHash(expectedUTF8Tokens.get(i), 1);
+ expectedHashedUTF8Tokens.add(hash);
+ }
+
+ // hashed tokens using token count
+ HashMap<String, Integer> tokenCounts = new HashMap<String, Integer>();
+ for (int i = 0; i < expectedUTF8Tokens.size(); i++) {
+ Integer count = tokenCounts.get(expectedUTF8Tokens.get(i));
+ if (count == null) {
+ count = 1;
+ tokenCounts.put(expectedUTF8Tokens.get(i), count);
+ } else {
+ count++;
+ }
+
+ int hash = tokenHash(expectedUTF8Tokens.get(i), count);
+ expectedCountedHashedUTF8Tokens.add(hash);
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithCountedHashedUTF8Tokens() throws IOException {
+
+ HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(false, false,
+ tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedToken = in.readInt();
+
+ Assert.assertEquals(hashedToken, expectedCountedHashedUTF8Tokens.get(tokenCount));
+
+ tokenCount++;
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithHashedUTF8Tokens() throws IOException {
+
+ HashedUTF8WordTokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ Integer hashedToken = in.readInt();
+
+ Assert.assertEquals(expectedHashedUTF8Tokens.get(tokenCount), hashedToken);
+
+ tokenCount++;
+ }
+ }
+
+ @Test
+ public void testWordTokenizerWithUTF8Tokens() throws IOException {
+
+ UTF8WordTokenFactory tokenFactory = new UTF8WordTokenFactory();
+ DelimitedUTF8StringBinaryTokenizer tokenizer = new DelimitedUTF8StringBinaryTokenizer(true, false, tokenFactory);
+
+ tokenizer.reset(inputBuffer, 0, inputBuffer.length);
+
+ int tokenCount = 0;
+
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+
+ // serialize hashed token
+ GrowableArray tokenData = new GrowableArray();
+
+ IToken token = tokenizer.getToken();
+ token.serializeToken(tokenData);
+
+ // deserialize token
+ ByteArrayInputStream bais = new ByteArrayInputStream(tokenData.getByteArray());
+ DataInput in = new DataInputStream(bais);
+
+ String strToken = in.readUTF();
+
+ Assert.assertEquals(expectedUTF8Tokens.get(tokenCount), strToken);
+
+ tokenCount++;
+ }
+ }
+
+ // JAQL Hash
+ public int tokenHash(String token, int tokenCount) {
+ int h = AbstractUTF8Token.GOLDEN_RATIO_32;
+ for (int i = 0; i < token.length(); i++) {
+ h ^= token.charAt(i);
+ h *= AbstractUTF8Token.GOLDEN_RATIO_32;
+ }
+ return h + tokenCount;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
new file mode 100644
index 0000000..870e6d9
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestContext.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.dataflow.value.ITypeTraits;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.util.SerdeUtils;
+import edu.uci.ics.hyracks.dataflow.common.util.TupleUtils;
+import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestContext;
+import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndex;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.InvertedIndexException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+
+@SuppressWarnings("rawtypes")
+public class LSMInvertedIndexTestContext extends OrderedIndexTestContext {
+
+ public static enum InvertedIndexType {
+ INMEMORY,
+ ONDISK,
+ LSM,
+ PARTITIONED_INMEMORY,
+ PARTITIONED_ONDISK,
+ PARTITIONED_LSM
+ };
+
+ protected IInvertedIndex invIndex;
+ protected IBinaryComparatorFactory[] allCmpFactories;
+ protected IBinaryTokenizerFactory tokenizerFactory;
+ protected InvertedIndexType invIndexType;
+ protected InvertedIndexTokenizingTupleIterator indexTupleIter;
+ protected HashSet<Comparable> allTokens = new HashSet<Comparable>();
+ protected List<ITupleReference> documentCorpus = new ArrayList<ITupleReference>();
+
+ public LSMInvertedIndexTestContext(ISerializerDeserializer[] fieldSerdes, IIndex index,
+ IBinaryTokenizerFactory tokenizerFactory, InvertedIndexType invIndexType,
+ InvertedIndexTokenizingTupleIterator indexTupleIter) {
+ super(fieldSerdes, index);
+ invIndex = (IInvertedIndex) index;
+ this.tokenizerFactory = tokenizerFactory;
+ this.invIndexType = invIndexType;
+ this.indexTupleIter = indexTupleIter;
+ }
+
+ @Override
+ public int getKeyFieldCount() {
+ return fieldSerdes.length;
+ }
+
+ @Override
+ public IBinaryComparatorFactory[] getComparatorFactories() {
+ if (allCmpFactories == null) {
+ // Concatenate token and inv-list comparators.
+ IInvertedIndex invIndex = (IInvertedIndex) index;
+ IBinaryComparatorFactory[] tokenCmpFactories = invIndex.getTokenCmpFactories();
+ IBinaryComparatorFactory[] invListCmpFactories = invIndex.getInvListCmpFactories();
+ int totalCmpCount = tokenCmpFactories.length + invListCmpFactories.length;
+ allCmpFactories = new IBinaryComparatorFactory[totalCmpCount];
+ for (int i = 0; i < tokenCmpFactories.length; i++) {
+ allCmpFactories[i] = tokenCmpFactories[i];
+ }
+ for (int i = 0; i < invListCmpFactories.length; i++) {
+ allCmpFactories[i + tokenCmpFactories.length] = invListCmpFactories[i];
+ }
+ }
+ return allCmpFactories;
+ }
+
+ public static LSMInvertedIndexTestContext create(LSMInvertedIndexTestHarness harness,
+ ISerializerDeserializer[] fieldSerdes, int tokenFieldCount, IBinaryTokenizerFactory tokenizerFactory,
+ InvertedIndexType invIndexType) throws IndexException {
+ ITypeTraits[] allTypeTraits = SerdeUtils.serdesToTypeTraits(fieldSerdes);
+ IBinaryComparatorFactory[] allCmpFactories = SerdeUtils.serdesToComparatorFactories(fieldSerdes,
+ fieldSerdes.length);
+ // Set token type traits and comparators.
+ ITypeTraits[] tokenTypeTraits = new ITypeTraits[tokenFieldCount];
+ IBinaryComparatorFactory[] tokenCmpFactories = new IBinaryComparatorFactory[tokenFieldCount];
+ for (int i = 0; i < tokenTypeTraits.length; i++) {
+ tokenTypeTraits[i] = allTypeTraits[i];
+ tokenCmpFactories[i] = allCmpFactories[i];
+ }
+ // Set inverted-list element type traits and comparators.
+ int invListFieldCount = fieldSerdes.length - tokenFieldCount;
+ ITypeTraits[] invListTypeTraits = new ITypeTraits[invListFieldCount];
+ IBinaryComparatorFactory[] invListCmpFactories = new IBinaryComparatorFactory[invListFieldCount];
+ for (int i = 0; i < invListTypeTraits.length; i++) {
+ invListTypeTraits[i] = allTypeTraits[i + tokenFieldCount];
+ invListCmpFactories[i] = allCmpFactories[i + tokenFieldCount];
+ }
+ // Create index and test context.
+ IInvertedIndex invIndex;
+ switch (invIndexType) {
+ case INMEMORY: {
+ invIndex = InvertedIndexUtils.createInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory);
+ break;
+ }
+ case PARTITIONED_INMEMORY: {
+ invIndex = InvertedIndexUtils.createPartitionedInMemoryBTreeInvertedindex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, tokenizerFactory);
+ break;
+ }
+ case ONDISK: {
+ invIndex = InvertedIndexUtils.createOnDiskInvertedIndex(harness.getDiskBufferCache(),
+ harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, harness.getInvListsFileRef());
+ break;
+ }
+ case PARTITIONED_ONDISK: {
+ invIndex = InvertedIndexUtils.createPartitionedOnDiskInvertedIndex(harness.getDiskBufferCache(),
+ harness.getDiskFileMapProvider(), invListTypeTraits, invListCmpFactories, tokenTypeTraits,
+ tokenCmpFactories, harness.getInvListsFileRef());
+ break;
+ }
+ case LSM: {
+ invIndex = InvertedIndexUtils.createLSMInvertedIndex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
+ harness.getDiskBufferCache(), harness.getIOManager(), harness.getOnDiskDir(),
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getIOOperationCallbackProvider());
+ break;
+ }
+ case PARTITIONED_LSM: {
+ invIndex = InvertedIndexUtils.createPartitionedLSMInvertedIndex(harness.getMemBufferCache(),
+ harness.getMemFreePageManager(), harness.getDiskFileMapProvider(), invListTypeTraits,
+ invListCmpFactories, tokenTypeTraits, tokenCmpFactories, tokenizerFactory,
+ harness.getDiskBufferCache(), harness.getIOManager(), harness.getOnDiskDir(),
+ harness.getMergePolicy(), harness.getOperationTrackerFactory(), harness.getIOScheduler(),
+ harness.getIOOperationCallbackProvider());
+ break;
+ }
+ default: {
+ throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
+ }
+ }
+ InvertedIndexTokenizingTupleIterator indexTupleIter = null;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ indexTupleIter = new InvertedIndexTokenizingTupleIterator(invIndex.getTokenTypeTraits().length,
+ invIndex.getInvListTypeTraits().length, tokenizerFactory.createTokenizer());
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ indexTupleIter = new PartitionedInvertedIndexTokenizingTupleIterator(
+ invIndex.getTokenTypeTraits().length, invIndex.getInvListTypeTraits().length,
+ tokenizerFactory.createTokenizer());
+ break;
+ }
+ default: {
+ throw new InvertedIndexException("Unknow inverted-index type '" + invIndexType + "'.");
+ }
+ }
+ LSMInvertedIndexTestContext testCtx = new LSMInvertedIndexTestContext(fieldSerdes, invIndex, tokenizerFactory,
+ invIndexType, indexTupleIter);
+ return testCtx;
+ }
+
+ public void insertCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
+ throws HyracksDataException {
+ documentCorpus.add(TupleUtils.copyTuple(tuple));
+ indexTupleIter.reset(tuple);
+ while (indexTupleIter.hasNext()) {
+ indexTupleIter.next();
+ ITupleReference insertTuple = indexTupleIter.getTuple();
+ CheckTuple checkTuple = createCheckTuple(insertTuple);
+ insertCheckTuple(checkTuple, checkTuples);
+ allTokens.add(checkTuple.getField(0));
+ }
+ }
+
+ public void deleteCheckTuples(ITupleReference tuple, Collection<CheckTuple> checkTuples)
+ throws HyracksDataException {
+ indexTupleIter.reset(tuple);
+ while (indexTupleIter.hasNext()) {
+ indexTupleIter.next();
+ ITupleReference insertTuple = indexTupleIter.getTuple();
+ CheckTuple checkTuple = createCheckTuple(insertTuple);
+ deleteCheckTuple(checkTuple, checkTuples);
+ }
+ }
+
+ public HashSet<Comparable> getAllTokens() {
+ return allTokens;
+ }
+
+ @SuppressWarnings("unchecked")
+ public CheckTuple createCheckTuple(ITupleReference tuple) throws HyracksDataException {
+ CheckTuple checkTuple = new CheckTuple(fieldSerdes.length, fieldSerdes.length);
+ for (int i = 0; i < fieldSerdes.length; i++) {
+ ByteArrayInputStream bains = new ByteArrayInputStream(tuple.getFieldData(i), tuple.getFieldStart(i),
+ tuple.getFieldLength(i));
+ DataInput in = new DataInputStream(bains);
+ Comparable field = (Comparable) fieldSerdes[i].deserialize(in);
+ checkTuple.appendField(field);
+ }
+ return checkTuple;
+ }
+
+ @Override
+ public void upsertCheckTuple(CheckTuple checkTuple, Collection<CheckTuple> checkTuples) {
+ throw new UnsupportedOperationException("Upsert not supported by inverted index.");
+ }
+
+ public IBinaryTokenizerFactory getTokenizerFactory() {
+ return tokenizerFactory;
+ }
+
+ public List<ITupleReference> getDocumentCorpus() {
+ return documentCorpus;
+ }
+
+ public InvertedIndexType getInvertedIndexType() {
+ return invIndexType;
+ }
+}
diff --git a/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
new file mode 100644
index 0000000..97f78f3
--- /dev/null
+++ b/fullstack/hyracks/hyracks-tests/hyracks-storage-am-lsm-invertedindex-test/src/test/java/edu/uci/ics/hyracks/storage/am/lsm/invertedindex/util/LSMInvertedIndexTestUtils.java
@@ -0,0 +1,568 @@
+/*
+ * Copyright 2009-2010 by The Regents of the University of California
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * you may obtain a copy of the License from
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util;
+
+import static org.junit.Assert.fail;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInput;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Random;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import edu.uci.ics.hyracks.api.dataflow.value.IBinaryComparatorFactory;
+import edu.uci.ics.hyracks.api.dataflow.value.ISerializerDeserializer;
+import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
+import edu.uci.ics.hyracks.data.std.util.GrowableArray;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleBuilder;
+import edu.uci.ics.hyracks.dataflow.common.comm.io.ArrayTupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.accessors.ITupleReference;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.IntegerSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.ShortSerializerDeserializer;
+import edu.uci.ics.hyracks.dataflow.common.data.marshalling.UTF8StringSerializerDeserializer;
+import edu.uci.ics.hyracks.storage.am.btree.OrderedIndexTestUtils;
+import edu.uci.ics.hyracks.storage.am.btree.impls.RangePredicate;
+import edu.uci.ics.hyracks.storage.am.common.CheckTuple;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexBulkLoader;
+import edu.uci.ics.hyracks.storage.am.common.api.IIndexCursor;
+import edu.uci.ics.hyracks.storage.am.common.api.IndexException;
+import edu.uci.ics.hyracks.storage.am.common.datagen.DocumentStringFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.IFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.PersonNameFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.SortedIntegerFieldValueGenerator;
+import edu.uci.ics.hyracks.storage.am.common.datagen.TupleGenerator;
+import edu.uci.ics.hyracks.storage.am.common.impls.NoOpOperationCallback;
+import edu.uci.ics.hyracks.storage.am.common.ophelpers.MultiComparator;
+import edu.uci.ics.hyracks.storage.am.common.tuples.PermutingTupleReference;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndex;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexAccessor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedIndexSearchModifier;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.api.IInvertedListCursor;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.common.LSMInvertedIndexTestHarness;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.exceptions.OccurrenceThresholdPanicException;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.search.InvertedIndexSearchPredicate;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.DelimitedUTF8StringBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.HashedUTF8WordTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizer;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.IToken;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.ITokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.NGramUTF8StringBinaryTokenizerFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8NGramTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.tokenizers.UTF8WordTokenFactory;
+import edu.uci.ics.hyracks.storage.am.lsm.invertedindex.util.LSMInvertedIndexTestContext.InvertedIndexType;
+
+@SuppressWarnings("rawtypes")
+public class LSMInvertedIndexTestUtils {
+
+ public static final int TEST_GRAM_LENGTH = 3;
+
+ public static TupleGenerator createStringDocumentTupleGen(Random rnd) throws IOException {
+ IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
+ fieldGens[0] = new DocumentStringFieldValueGenerator(2, 10, 10000, rnd);
+ fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
+ ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
+ return tupleGen;
+ }
+
+ public static TupleGenerator createPersonNamesTupleGen(Random rnd) throws IOException {
+ IFieldValueGenerator[] fieldGens = new IFieldValueGenerator[2];
+ fieldGens[0] = new PersonNameFieldValueGenerator(rnd, 0.5f);
+ fieldGens[1] = new SortedIntegerFieldValueGenerator(0);
+ ISerializerDeserializer[] fieldSerdes = new ISerializerDeserializer[] {
+ UTF8StringSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ TupleGenerator tupleGen = new TupleGenerator(fieldGens, fieldSerdes, 0);
+ return tupleGen;
+ }
+
+ private static ISerializerDeserializer[] getNonHashedIndexFieldSerdes(InvertedIndexType invIndexType)
+ throws IndexException {
+ ISerializerDeserializer[] fieldSerdes = null;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ // Such indexes also include the set-size for partitioning.
+ fieldSerdes = new ISerializerDeserializer[] { UTF8StringSerializerDeserializer.INSTANCE,
+ ShortSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ break;
+ }
+ default: {
+ throw new IndexException("Unhandled inverted index type '" + invIndexType + "'.");
+ }
+ }
+ return fieldSerdes;
+ }
+
+ private static ISerializerDeserializer[] getHashedIndexFieldSerdes(InvertedIndexType invIndexType)
+ throws IndexException {
+ ISerializerDeserializer[] fieldSerdes = null;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ fieldSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
+ IntegerSerializerDeserializer.INSTANCE };
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ // Such indexes also include the set-size for partitioning.
+ fieldSerdes = new ISerializerDeserializer[] { IntegerSerializerDeserializer.INSTANCE,
+ ShortSerializerDeserializer.INSTANCE, IntegerSerializerDeserializer.INSTANCE };
+ break;
+ }
+ default: {
+ throw new IndexException("Unhandled inverted index type '" + invIndexType + "'.");
+ }
+ }
+ return fieldSerdes;
+ }
+
+ public static LSMInvertedIndexTestContext createWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+ InvertedIndexType invIndexType) throws IOException, IndexException {
+ ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
+ ITokenFactory tokenFactory = new UTF8WordTokenFactory();
+ IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
+ tokenFactory);
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes,
+ fieldSerdes.length - 1, tokenizerFactory, invIndexType);
+ return testCtx;
+ }
+
+ public static LSMInvertedIndexTestContext createHashedWordInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+ InvertedIndexType invIndexType) throws IOException, IndexException {
+ ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
+ ITokenFactory tokenFactory = new HashedUTF8WordTokenFactory();
+ IBinaryTokenizerFactory tokenizerFactory = new DelimitedUTF8StringBinaryTokenizerFactory(true, false,
+ tokenFactory);
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes,
+ fieldSerdes.length - 1, tokenizerFactory, invIndexType);
+ return testCtx;
+ }
+
+ public static LSMInvertedIndexTestContext createNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+ InvertedIndexType invIndexType) throws IOException, IndexException {
+ ISerializerDeserializer[] fieldSerdes = getNonHashedIndexFieldSerdes(invIndexType);
+ ITokenFactory tokenFactory = new UTF8NGramTokenFactory();
+ IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true,
+ true, false, tokenFactory);
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes,
+ fieldSerdes.length - 1, tokenizerFactory, invIndexType);
+ return testCtx;
+ }
+
+ public static LSMInvertedIndexTestContext createHashedNGramInvIndexTestContext(LSMInvertedIndexTestHarness harness,
+ InvertedIndexType invIndexType) throws IOException, IndexException {
+ ISerializerDeserializer[] fieldSerdes = getHashedIndexFieldSerdes(invIndexType);
+ ITokenFactory tokenFactory = new HashedUTF8NGramTokenFactory();
+ IBinaryTokenizerFactory tokenizerFactory = new NGramUTF8StringBinaryTokenizerFactory(TEST_GRAM_LENGTH, true,
+ true, false, tokenFactory);
+ LSMInvertedIndexTestContext testCtx = LSMInvertedIndexTestContext.create(harness, fieldSerdes,
+ fieldSerdes.length - 1, tokenizerFactory, invIndexType);
+ return testCtx;
+ }
+
+ public static void bulkLoadInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs)
+ throws IndexException, IOException {
+ SortedSet<CheckTuple> tmpMemIndex = new TreeSet<CheckTuple>();
+ // First generate the expected index by inserting the documents one-by-one.
+ for (int i = 0; i < numDocs; i++) {
+ ITupleReference tuple = tupleGen.next();
+ testCtx.insertCheckTuples(tuple, tmpMemIndex);
+ }
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+
+ // Use the expected index to bulk-load the actual index.
+ IIndexBulkLoader bulkLoader = testCtx.getIndex().createBulkLoader(1.0f, false, numDocs);
+ ArrayTupleBuilder tupleBuilder = new ArrayTupleBuilder(testCtx.getFieldSerdes().length);
+ ArrayTupleReference tuple = new ArrayTupleReference();
+ Iterator<CheckTuple> checkTupleIter = tmpMemIndex.iterator();
+ while (checkTupleIter.hasNext()) {
+ CheckTuple checkTuple = checkTupleIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(checkTuple, tupleBuilder, tuple, fieldSerdes);
+ bulkLoader.add(tuple);
+ }
+ bulkLoader.end();
+
+ // Add all check tuples from the temp index to the text context.
+ testCtx.getCheckTuples().addAll(tmpMemIndex);
+ }
+
+ public static void insertIntoInvIndex(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, int numDocs)
+ throws IOException, IndexException {
+ // InMemoryInvertedIndex only supports insert.
+ for (int i = 0; i < numDocs; i++) {
+ ITupleReference tuple = tupleGen.next();
+ testCtx.getIndexAccessor().insert(tuple);
+ testCtx.insertCheckTuples(tuple, testCtx.getCheckTuples());
+ }
+ }
+
+ public static void deleteFromInvIndex(LSMInvertedIndexTestContext testCtx, Random rnd, int numDocsToDelete)
+ throws HyracksDataException, IndexException {
+ List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
+ for (int i = 0; i < numDocsToDelete && !documentCorpus.isEmpty(); i++) {
+ int size = documentCorpus.size();
+ int tupleIndex = Math.abs(rnd.nextInt()) % size;
+ ITupleReference deleteTuple = documentCorpus.get(tupleIndex);
+ testCtx.getIndexAccessor().delete(deleteTuple);
+ testCtx.deleteCheckTuples(deleteTuple, testCtx.getCheckTuples());
+ // Swap tupleIndex with last element.
+ documentCorpus.set(tupleIndex, documentCorpus.get(size - 1));
+ documentCorpus.remove(size - 1);
+ }
+ }
+
+ /**
+ * Compares actual and expected indexes using the rangeSearch() method of the inverted-index accessor.
+ */
+ public static void compareActualAndExpectedIndexesRangeSearch(LSMInvertedIndexTestContext testCtx)
+ throws HyracksDataException, IndexException {
+ IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
+ int tokenFieldCount = invIndex.getTokenTypeTraits().length;
+ int invListFieldCount = invIndex.getInvListTypeTraits().length;
+ IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ IIndexCursor invIndexCursor = invIndexAccessor.createRangeSearchCursor();
+ MultiComparator tokenCmp = MultiComparator.create(invIndex.getTokenCmpFactories());
+ IBinaryComparatorFactory[] tupleCmpFactories = new IBinaryComparatorFactory[tokenFieldCount + invListFieldCount];
+ for (int i = 0; i < tokenFieldCount; i++) {
+ tupleCmpFactories[i] = invIndex.getTokenCmpFactories()[i];
+ }
+ for (int i = 0; i < invListFieldCount; i++) {
+ tupleCmpFactories[tokenFieldCount + i] = invIndex.getInvListCmpFactories()[i];
+ }
+ MultiComparator tupleCmp = MultiComparator.create(tupleCmpFactories);
+ RangePredicate nullPred = new RangePredicate(null, null, true, true, tokenCmp, tokenCmp);
+ invIndexAccessor.rangeSearch(invIndexCursor, nullPred);
+
+ // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+ ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
+ ArrayTupleReference expectedTuple = new ArrayTupleReference();
+
+ Iterator<CheckTuple> expectedIter = testCtx.getCheckTuples().iterator();
+
+ // Compare index elements.
+ try {
+ while (invIndexCursor.hasNext() && expectedIter.hasNext()) {
+ invIndexCursor.next();
+ ITupleReference actualTuple = invIndexCursor.getTuple();
+ CheckTuple expected = expectedIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, expectedTuple, fieldSerdes);
+ if (tupleCmp.compare(actualTuple, expectedTuple) != 0) {
+ fail("Index entries differ for token '" + expected.getField(0) + "'.");
+ }
+ }
+ if (expectedIter.hasNext()) {
+ fail("Indexes do not match. Actual index is missing entries.");
+ }
+ if (invIndexCursor.hasNext()) {
+ fail("Indexes do not match. Actual index contains too many entries.");
+ }
+ } finally {
+ invIndexCursor.close();
+ }
+ }
+
+ /**
+ * Compares actual and expected indexes by comparing their inverted-lists one by one. Exercises the openInvertedListCursor() method of the inverted-index accessor.
+ */
+ @SuppressWarnings("unchecked")
+ public static void compareActualAndExpectedIndexes(LSMInvertedIndexTestContext testCtx)
+ throws HyracksDataException, IndexException {
+ IInvertedIndex invIndex = (IInvertedIndex) testCtx.getIndex();
+ ISerializerDeserializer[] fieldSerdes = testCtx.getFieldSerdes();
+ MultiComparator invListCmp = MultiComparator.create(invIndex.getInvListCmpFactories());
+ IInvertedIndexAccessor invIndexAccessor = (IInvertedIndexAccessor) testCtx.getIndexAccessor();
+ int tokenFieldCount = invIndex.getTokenTypeTraits().length;
+ int invListFieldCount = invIndex.getInvListTypeTraits().length;
+ // All tokens that were inserted into the indexes.
+ Iterator<Comparable> tokensIter = testCtx.getAllTokens().iterator();
+
+ // Search key for finding an inverted-list in the actual index.
+ ArrayTupleBuilder searchKeyBuilder = new ArrayTupleBuilder(tokenFieldCount);
+ ArrayTupleReference searchKey = new ArrayTupleReference();
+ // Cursor over inverted list from actual index.
+ IInvertedListCursor actualInvListCursor = invIndexAccessor.createInvertedListCursor();
+
+ // Helpers for generating a serialized inverted-list element from a CheckTuple from the expected index.
+ ArrayTupleBuilder expectedBuilder = new ArrayTupleBuilder(fieldSerdes.length);
+ // Includes the token fields.
+ ArrayTupleReference completeExpectedTuple = new ArrayTupleReference();
+ // Field permutation and permuting tuple reference to strip away token fields from completeExpectedTuple.
+ int[] fieldPermutation = new int[invListFieldCount];
+ for (int i = 0; i < fieldPermutation.length; i++) {
+ fieldPermutation[i] = tokenFieldCount + i;
+ }
+ PermutingTupleReference expectedTuple = new PermutingTupleReference(fieldPermutation);
+
+ // Iterate over all tokens. Find the inverted-lists in actual and expected indexes. Compare the inverted lists,
+ while (tokensIter.hasNext()) {
+ Comparable token = tokensIter.next();
+
+ // Position inverted-list iterator on expected index.
+ CheckTuple checkLowKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
+ checkLowKey.appendField(token);
+ CheckTuple checkHighKey = new CheckTuple(tokenFieldCount, tokenFieldCount);
+ checkHighKey.appendField(token);
+
+ SortedSet<CheckTuple> expectedInvList = OrderedIndexTestUtils.getPrefixExpectedSubset(
+ testCtx.getCheckTuples(), checkLowKey, checkHighKey);
+ Iterator<CheckTuple> expectedInvListIter = expectedInvList.iterator();
+
+ // Position inverted-list cursor in actual index.
+ OrderedIndexTestUtils.createTupleFromCheckTuple(checkLowKey, searchKeyBuilder, searchKey, fieldSerdes);
+ invIndexAccessor.openInvertedListCursor(actualInvListCursor, searchKey);
+
+ if (actualInvListCursor.size() != expectedInvList.size()) {
+ fail("Actual and expected inverted lists for token '" + token.toString()
+ + "' have different sizes. Actual size: " + actualInvListCursor.size() + ". Expected size: "
+ + expectedInvList.size() + ".");
+ }
+ // Compare inverted-list elements.
+ int count = 0;
+ actualInvListCursor.pinPages();
+ try {
+ while (actualInvListCursor.hasNext() && expectedInvListIter.hasNext()) {
+ actualInvListCursor.next();
+ ITupleReference actual = actualInvListCursor.getTuple();
+ CheckTuple expected = expectedInvListIter.next();
+ OrderedIndexTestUtils.createTupleFromCheckTuple(expected, expectedBuilder, completeExpectedTuple,
+ fieldSerdes);
+ expectedTuple.reset(completeExpectedTuple);
+ if (invListCmp.compare(actual, expectedTuple) != 0) {
+ fail("Inverted lists of token '" + token + "' differ at position " + count + ".");
+ }
+ count++;
+ }
+ } finally {
+ actualInvListCursor.unpinPages();
+ }
+ }
+ }
+
+ /**
+ * Determine the expected results with the simple ScanCount algorithm.
+ */
+ public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
+ ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
+ IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, InvertedIndexType invIndexType)
+ throws IOException {
+ boolean isPartitioned = false;
+ switch (invIndexType) {
+ case INMEMORY:
+ case ONDISK:
+ case LSM: {
+ isPartitioned = false;
+ break;
+ }
+ case PARTITIONED_INMEMORY:
+ case PARTITIONED_ONDISK:
+ case PARTITIONED_LSM: {
+ isPartitioned = true;
+ break;
+ }
+ }
+ getExpectedResults(scanCountArray, checkTuples, searchDocument, tokenizer, tokenSerde, searchModifier,
+ expectedResults, isPartitioned);
+ }
+
+ @SuppressWarnings("unchecked")
+ public static void getExpectedResults(int[] scanCountArray, TreeSet<CheckTuple> checkTuples,
+ ITupleReference searchDocument, IBinaryTokenizer tokenizer, ISerializerDeserializer tokenSerde,
+ IInvertedIndexSearchModifier searchModifier, List<Integer> expectedResults, boolean isPartitioned)
+ throws IOException {
+ // Reset scan count array.
+ Arrays.fill(scanCountArray, 0);
+ expectedResults.clear();
+
+ GrowableArray tokenData = new GrowableArray();
+ tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0),
+ searchDocument.getFieldLength(0));
+ // Run though tokenizer to get number of tokens.
+ int numQueryTokens = 0;
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+ numQueryTokens++;
+ }
+ short numTokensLowerBound = -1;
+ short numTokensUpperBound = -1;
+ int invListElementField = 1;
+ if (isPartitioned) {
+ numTokensLowerBound = searchModifier.getNumTokensLowerBound((short) numQueryTokens);
+ numTokensUpperBound = searchModifier.getNumTokensUpperBound((short) numQueryTokens);
+ invListElementField = 2;
+ }
+ int occurrenceThreshold = searchModifier.getOccurrenceThreshold(numQueryTokens);
+ tokenizer.reset(searchDocument.getFieldData(0), searchDocument.getFieldStart(0),
+ searchDocument.getFieldLength(0));
+ while (tokenizer.hasNext()) {
+ tokenizer.next();
+ IToken token = tokenizer.getToken();
+ tokenData.reset();
+ token.serializeToken(tokenData);
+ ByteArrayInputStream inStream = new ByteArrayInputStream(tokenData.getByteArray(), 0, tokenData.getLength());
+ DataInput dataIn = new DataInputStream(inStream);
+ Comparable tokenObj = (Comparable) tokenSerde.deserialize(dataIn);
+ CheckTuple lowKey;
+ if (numTokensLowerBound < 0) {
+ // Index is not partitioned, or no length filtering is possible for this search modifier.
+ lowKey = new CheckTuple(1, 1);
+ lowKey.appendField(tokenObj);
+ } else {
+ // Index is length partitioned, and search modifier supports length filtering.
+ lowKey = new CheckTuple(2, 2);
+ lowKey.appendField(tokenObj);
+ lowKey.appendField(Short.valueOf(numTokensLowerBound));
+ }
+ CheckTuple highKey;
+ if (numTokensUpperBound < 0) {
+ // Index is not partitioned, or no length filtering is possible for this search modifier.
+ highKey = new CheckTuple(1, 1);
+ highKey.appendField(tokenObj);
+ } else {
+ // Index is length partitioned, and search modifier supports length filtering.
+ highKey = new CheckTuple(2, 2);
+ highKey.appendField(tokenObj);
+ highKey.appendField(Short.valueOf(numTokensUpperBound));
+ }
+
+ // Get view over check tuples containing inverted-list corresponding to token.
+ SortedSet<CheckTuple> invList = OrderedIndexTestUtils.getPrefixExpectedSubset(checkTuples, lowKey, highKey);
+ Iterator<CheckTuple> invListIter = invList.iterator();
+ // Iterate over inverted list and update scan count array.
+ while (invListIter.hasNext()) {
+ CheckTuple checkTuple = invListIter.next();
+ Integer element = (Integer) checkTuple.getField(invListElementField);
+ scanCountArray[element]++;
+ }
+ }
+
+ // Run through scan count array, and see whether elements satisfy the given occurrence threshold.
+ expectedResults.clear();
+ for (int i = 0; i < scanCountArray.length; i++) {
+ if (scanCountArray[i] >= occurrenceThreshold) {
+ expectedResults.add(i);
+ }
+ }
+ }
+
+ public static void testIndexSearch(LSMInvertedIndexTestContext testCtx, TupleGenerator tupleGen, Random rnd,
+ int numDocQueries, int numRandomQueries, IInvertedIndexSearchModifier searchModifier, int[] scanCountArray)
+ throws IOException, IndexException {
+ IInvertedIndex invIndex = testCtx.invIndex;
+ IInvertedIndexAccessor accessor = (IInvertedIndexAccessor) invIndex.createAccessor(
+ NoOpOperationCallback.INSTANCE, NoOpOperationCallback.INSTANCE);
+ IBinaryTokenizer tokenizer = testCtx.getTokenizerFactory().createTokenizer();
+ InvertedIndexSearchPredicate searchPred = new InvertedIndexSearchPredicate(tokenizer, searchModifier);
+ List<ITupleReference> documentCorpus = testCtx.getDocumentCorpus();
+ // Project away the primary-key field.
+ int[] fieldPermutation = new int[] { 0 };
+ PermutingTupleReference searchDocument = new PermutingTupleReference(fieldPermutation);
+
+ IIndexCursor resultCursor = accessor.createSearchCursor();
+ int numQueries = numDocQueries + numRandomQueries;
+ for (int i = 0; i < numQueries; i++) {
+ // If number of documents in the corpus is less than numDocQueries, then replace the remaining ones with random queries.
+ if (i >= numDocQueries || i >= documentCorpus.size()) {
+ // Generate a random query.
+ ITupleReference randomQuery = tupleGen.next();
+ searchDocument.reset(randomQuery);
+ } else {
+ // Pick a random document from the corpus to use as the search query.
+ int queryIndex = Math.abs(rnd.nextInt() % documentCorpus.size());
+ searchDocument.reset(documentCorpus.get(queryIndex));
+ }
+
+ // Set query tuple in search predicate.
+ searchPred.setQueryTuple(searchDocument);
+ searchPred.setQueryFieldIndex(0);
+
+ resultCursor.reset();
+ boolean panic = false;
+ try {
+ accessor.search(resultCursor, searchPred);
+ } catch (OccurrenceThresholdPanicException e) {
+ // ignore panic queries.
+ panic = true;
+ }
+
+ try {
+ if (!panic) {
+ // Consume cursor and deserialize results so we can sort them. Some search cursors may not deliver the result sorted (e.g., LSM search cursor).
+ ArrayList<Integer> actualResults = new ArrayList<Integer>();
+ try {
+ while (resultCursor.hasNext()) {
+ resultCursor.next();
+ ITupleReference resultTuple = resultCursor.getTuple();
+ int actual = IntegerSerializerDeserializer.getInt(resultTuple.getFieldData(0),
+ resultTuple.getFieldStart(0));
+ actualResults.add(Integer.valueOf(actual));
+ }
+ } catch (OccurrenceThresholdPanicException e) {
+ // Ignore panic queries.
+ continue;
+ }
+ Collections.sort(actualResults);
+
+ // Get expected results.
+ List<Integer> expectedResults = new ArrayList<Integer>();
+ LSMInvertedIndexTestUtils.getExpectedResults(scanCountArray, testCtx.getCheckTuples(),
+ searchDocument, tokenizer, testCtx.getFieldSerdes()[0], searchModifier, expectedResults,
+ testCtx.getInvertedIndexType());
+
+ Iterator<Integer> expectedIter = expectedResults.iterator();
+ Iterator<Integer> actualIter = actualResults.iterator();
+ while (expectedIter.hasNext() && actualIter.hasNext()) {
+ int expected = expectedIter.next();
+ int actual = actualIter.next();
+ if (actual != expected) {
+ fail("Query results do not match. Encountered: " + actual + ". Expected: " + expected + "");
+ }
+ }
+ if (expectedIter.hasNext()) {
+ fail("Query results do not match. Actual results missing.");
+ }
+ if (actualIter.hasNext()) {
+ fail("Query results do not match. Actual contains too many results.");
+ }
+ }
+ } finally {
+ resultCursor.close();
+ }
+ }
+ }
+}